diff --git a/esphome/components/runtime_stats/runtime_stats.h b/esphome/components/runtime_stats/runtime_stats.h index 888d48e672..1e4910453a 100644 --- a/esphome/components/runtime_stats/runtime_stats.h +++ b/esphome/components/runtime_stats/runtime_stats.h @@ -47,7 +47,7 @@ class RuntimeStatsCollector { // overhead between Phase A and stats belongs to "residual"). // Residual overhead at log time = active − Σ(component) − before − tail, // which captures per-iteration inter-component bookkeeping (set_current_component, - // WarnIfComponentBlockingGuard construction/destruction, feed_wdt_with_time calls, + // LoopBlockingGuard construction/destruction, feed_wdt_with_time calls, // the for-loop itself). void record_loop_active(uint32_t active_us, uint32_t before_us, uint32_t tail_us) { this->period_active_count_++; diff --git a/esphome/components/script/script.h b/esphome/components/script/script.h index 847fab02bd..6cd33e566c 100644 --- a/esphome/components/script/script.h +++ b/esphome/components/script/script.h @@ -3,6 +3,7 @@ #include #include #include +#include "esphome/core/application.h" #include "esphome/core/automation.h" #include "esphome/core/component.h" #include "esphome/core/helpers.h" @@ -57,6 +58,14 @@ template class Script : public ScriptLogger, public Triggerexecute(std::get(tuple)...); } + // Run the action chain with this script's name published as the current source (RAII save/restore, + // so nesting composes), so deferred work inside the script is attributed to it in blocking + // warnings. Force-inlined to fold into the always-inlined trigger chain (no extra stack frame). + inline void run_actions_(const Ts &...x) ESPHOME_ALWAYS_INLINE { + ScopedSourceGuard source_guard{this->name_}; + this->trigger(x...); + } + const LogString *name_{nullptr}; }; @@ -74,7 +83,7 @@ template class SingleScript : public Script { return; } - this->trigger(x...); + this->run_actions_(x...); } }; @@ -91,7 +100,7 @@ template class RestartScript : public Script { this->stop_action(); } - this->trigger(x...); + this->run_actions_(x...); } }; @@ -136,7 +145,7 @@ template class QueueingScript : public Script, public Com return; } - this->trigger(x...); + this->run_actions_(x...); // Check if the trigger was immediate and we can continue right away. this->loop(); } @@ -175,7 +184,7 @@ template class QueueingScript : public Script, public Com } template void trigger_tuple_(const std::tuple &tuple, std::index_sequence /*unused*/) { - this->trigger(std::get(tuple)...); + this->run_actions_(std::get(tuple)...); } int num_queued_ = 0; // Number of queued instances (not including currently running) @@ -197,7 +206,7 @@ template class ParallelScript : public Script { LOG_STR_ARG(this->name_)); return; } - this->trigger(x...); + this->run_actions_(x...); } void set_max_runs(int max_runs) { max_runs_ = max_runs; } diff --git a/esphome/core/application.h b/esphome/core/application.h index 369c970d46..7c12a66b2c 100644 --- a/esphome/core/application.h +++ b/esphome/core/application.h @@ -104,9 +104,13 @@ class Application { void register_area(Area *area) { this->areas_.push_back(area); } #endif - void set_current_component(Component *component) { this->current_component_ = component; } Component *get_current_component() { return this->current_component_; } + // Owning script of the action chain currently executing (nullptr when none); used to attribute + // blocking warnings for deferred work to the script that scheduled it. + void set_current_source(const LogString *source) { this->current_source_ = source; } + const LogString *get_current_source() { return this->current_source_; } + // Entity register methods (generated from entity_types.h). // Each entity type gets two overloads: // - register_(obj) — bare push_back @@ -393,6 +397,7 @@ class Application { protected: friend Component; friend class Scheduler; + friend class LoopBlockingGuard; #ifdef USE_RUNTIME_STATS friend class runtime_stats::RuntimeStatsCollector; #endif @@ -402,6 +407,14 @@ class Application { /// Freshen the cached loop component start time. Called by Scheduler before each dispatch. void set_loop_component_start_time_(uint32_t now) { this->loop_component_start_time_ = now; } + // Publish the running unit's identity (component + source) and dispatch time together, so a + // dispatch site can't set one without the others. Friend-only (Scheduler). + void set_current_execution_context_(Component *component, const LogString *source, uint32_t now) { + this->current_component_ = component; + this->current_source_ = source; + this->set_loop_component_start_time_(now); + } + /// Walk all registered components looking for any whose component_state_ /// has the given flag set. Used by Component::status_clear_*_slow_path_() /// (which is a friend) to decide whether to clear the corresponding bit on @@ -482,6 +495,7 @@ class Application { // Pointer-sized members first Component *current_component_{nullptr}; + const LogString *current_source_{nullptr}; // std::vector (3 pointers each: begin, end, capacity) // Partitioned vector design for looping components @@ -554,6 +568,76 @@ class Application { /// Global storage of Application pointer - only one Application can exist. extern Application App; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables) +/// RAII guard that publishes a current source (e.g. a script name) for a scope and restores the +/// previous value on exit, attributing deferred work scheduled inside to that source. +class ScopedSourceGuard { + public: + explicit ScopedSourceGuard(const LogString *source) : prev_(App.get_current_source()) { + App.set_current_source(source); + } + ~ScopedSourceGuard() { App.set_current_source(this->prev_); } + ScopedSourceGuard(const ScopedSourceGuard &) = delete; + ScopedSourceGuard &operator=(const ScopedSourceGuard &) = delete; + + private: + const LogString *prev_; +}; + +// Times one unit of work (a component loop() or a scheduled callback) and warns if it blocks the +// main loop too long. The constructor publishes the unit's identity + dispatch time to App; +// finish()/the cold warning path read them back, so the guard stores no copy. +// +// Guards must not nest: the constructor publishes to App but never restores on destruction, so a +// nested guard would clobber the outer's context. Safe because the two dispatch sites (component +// loop phase, execute_item_) run strictly sequentially and aren't re-entered from a timed callback. +class LoopBlockingGuard { + public: + // Publish the unit's identity + dispatch time, then start timing. The millis start lives in App, + // so only the runtime-stats micros stamp is kept here. + LoopBlockingGuard(Component *component, const LogString *source, uint32_t now) { + App.set_current_execution_context_(component, source, now); +#ifdef USE_RUNTIME_STATS + this->started_us_ = micros(); +#endif + } + + // Finish the timing operation and return the current time (millis) + // Inlined: the fast path is just millis() + subtract + compare + inline uint32_t HOT finish() { +#ifdef USE_RUNTIME_STATS + uint32_t elapsed_us = micros() - this->started_us_; + // Delays have no component; accumulate into the global counter so loop() can subtract them. + Component *component = App.get_current_component(); + if (component != nullptr) { + component->runtime_stats_.record_time(elapsed_us); + } else { + ComponentRuntimeStats::global_recorded_us += elapsed_us; + } +#endif + uint32_t curr_time = MillisInternal::get(); +#ifndef USE_BENCHMARK + // Fast path: compare against constant threshold in ms (computed at compile time from centiseconds) + static constexpr uint32_t WARN_IF_BLOCKING_OVER_MS = static_cast(WARN_IF_BLOCKING_OVER_CS) * 10U; + uint32_t blocking_time = curr_time - App.get_loop_component_start_time(); + if (blocking_time > WARN_IF_BLOCKING_OVER_MS) [[unlikely]] { + warn_blocking(blocking_time); + } +#endif + return curr_time; + } + + ~LoopBlockingGuard() = default; + +#ifdef USE_RUNTIME_STATS + protected: + uint32_t started_us_; +#endif + + private: + // Cold path; defined in component.cpp. Reads the current component/source from App to name the culprit. + static void __attribute__((noinline, cold)) warn_blocking(uint32_t blocking_time); +}; + // Phase A: drain wake notifications and run the scheduler. Invoked on every // Application::loop() tick regardless of whether a component phase runs, so // scheduler items fire at their requested cadence even when the caller has @@ -607,7 +691,7 @@ inline void ESPHOME_ALWAYS_INLINE Application::loop() { // before/tail splits recorded below. uint32_t loop_active_start_us = micros(); // Snapshot the cumulative component-recorded time so we can subtract the - // slice that the scheduler spends inside its own WarnIfComponentBlockingGuard + // slice that the scheduler spends inside its own LoopBlockingGuard // (scheduler.cpp) — that time is already counted in per-component stats, // so charging it again to "before" would double-count. uint64_t loop_recorded_snap = ComponentRuntimeStats::global_recorded_us; @@ -660,12 +744,9 @@ inline void ESPHOME_ALWAYS_INLINE Application::loop() { this->current_loop_index_++) { Component *component = this->looping_components_[this->current_loop_index_]; - // Update the cached time before each component runs - this->loop_component_start_time_ = last_op_end_time; - { - this->set_current_component(component); - WarnIfComponentBlockingGuard guard{component, last_op_end_time}; + // Guard publishes this component (no script source) + dispatch time, then times loop(). + LoopBlockingGuard guard{component, nullptr, last_op_end_time}; component->loop(); // Use the finish method to get the current time as the end time last_op_end_time = guard.finish(); diff --git a/esphome/core/base_automation.h b/esphome/core/base_automation.h index dcad7c9d2e..cf8b05a300 100644 --- a/esphome/core/base_automation.h +++ b/esphome/core/base_automation.h @@ -201,7 +201,10 @@ template class DelayAction : public Action { /* component= */ nullptr, Scheduler::SchedulerItem::TIMEOUT, Scheduler::NameType::SELF_POINTER, /* static_name= */ reinterpret_cast(this), /* hash_or_id= */ 0, this->delay_.value(), [this]() { this->play_next_(); }, - /* is_retry= */ false, /* skip_cancel= */ this->num_running_ > 1); + /* is_retry= */ false, /* skip_cancel= */ this->num_running_ > 1, + // Record the owning script (if any) so the blocking warning can name it; propagates across + // chained delays via the scheduler. + /* source= */ App.get_current_source()); } else { // For delays with arguments, capture by value to preserve argument values // Arguments must be copied because original references may be invalid after delay @@ -212,7 +215,9 @@ template class DelayAction : public Action { /* component= */ nullptr, Scheduler::SchedulerItem::TIMEOUT, Scheduler::NameType::SELF_POINTER, /* static_name= */ reinterpret_cast(this), /* hash_or_id= */ 0, this->delay_.value(x...), std::move(f), - /* is_retry= */ false, /* skip_cancel= */ this->num_running_ > 1); + /* is_retry= */ false, /* skip_cancel= */ this->num_running_ > 1, + // See the no-argument branch above: record the owning script for log attribution. + /* source= */ App.get_current_source()); } } diff --git a/esphome/core/component.cpp b/esphome/core/component.cpp index 2d80301897..7ef5ff50a5 100644 --- a/esphome/core/component.cpp +++ b/esphome/core/component.cpp @@ -258,9 +258,11 @@ void Component::call() { break; } } -bool Component::should_warn_of_blocking(uint32_t blocking_time) { +bool Component::should_warn_of_blocking(uint32_t blocking_time, uint32_t &threshold_ms_out) { // Convert centisecond threshold to milliseconds for comparison uint32_t threshold_ms = static_cast(this->warn_if_blocking_over_) * 10U; + // Report the threshold that was exceeded (before any ratcheting below) so the warning is accurate. + threshold_ms_out = threshold_ms; if (blocking_time > threshold_ms) { // Set new threshold: blocking_time + increment, converted back to centiseconds uint32_t new_threshold_ms = blocking_time + WARN_IF_BLOCKING_INCREMENT_MS; @@ -491,19 +493,25 @@ uint32_t PollingComponent::get_update_interval() const { return this->update_int uint64_t ComponentRuntimeStats::global_recorded_us = 0; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables) #endif -void __attribute__((noinline, cold)) -WarnIfComponentBlockingGuard::warn_blocking(Component *component, uint32_t blocking_time) { - bool should_warn; +void __attribute__((noinline, cold)) LoopBlockingGuard::warn_blocking(uint32_t blocking_time) { + // Identity is published on App by the caller before the guard is built; read it back here. + Component *component = App.get_current_component(); + // Component-less path always warns (the caller already checked the constant threshold). + uint32_t threshold_ms = WARN_IF_BLOCKING_OVER_MS; + if (component != nullptr && !component->should_warn_of_blocking(blocking_time, threshold_ms)) { + return; // Component's (possibly ratcheted) threshold not exceeded yet + } + // Component name if any, else the published source (owning script), else a generic label. + const LogString *name; if (component != nullptr) { - should_warn = component->should_warn_of_blocking(blocking_time); + name = component->get_component_log_str(); } else { - should_warn = true; // Already checked > WARN_IF_BLOCKING_OVER_MS in caller - } - if (should_warn) { - ESP_LOGW(TAG, "%s took a long time for an operation (%" PRIu32 " ms), max is 30 ms", - component == nullptr ? LOG_STR_LITERAL("") : LOG_STR_ARG(component->get_component_log_str()), - blocking_time); + name = App.get_current_source(); + if (name == nullptr) + name = LOG_STR("a scheduled task"); } + ESP_LOGW(TAG, "%s took a long time for an operation (%" PRIu32 " ms), max is %" PRIu32 " ms", LOG_STR_ARG(name), + blocking_time, threshold_ms); } #ifdef USE_SETUP_PRIORITY_OVERRIDE diff --git a/esphome/core/component.h b/esphome/core/component.h index ff10f1a8f1..299a5f72ea 100644 --- a/esphome/core/component.h +++ b/esphome/core/component.h @@ -118,7 +118,7 @@ struct ComponentRuntimeStats { // Cumulative sum of every record_time() duration since boot, across all // components. Used by Application::loop() to snapshot time spent inside - // WarnIfComponentBlockingGuard (including guards constructed by the + // LoopBlockingGuard (including guards constructed by the // scheduler at scheduler.cpp) so main-loop overhead accounting can // subtract scheduled-callback time from the before_loop_tasks_ wall time. static uint64_t global_recorded_us; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables) @@ -326,7 +326,7 @@ class Component { return component_source_lookup(this->component_source_index_); } - bool should_warn_of_blocking(uint32_t blocking_time); + bool should_warn_of_blocking(uint32_t blocking_time, uint32_t &threshold_ms_out); protected: friend class Application; @@ -571,7 +571,7 @@ class Component { volatile bool pending_enable_loop_{false}; ///< ISR-safe flag for enable_loop_soon_any_context #ifdef USE_RUNTIME_STATS friend class runtime_stats::RuntimeStatsCollector; - friend class WarnIfComponentBlockingGuard; + friend class LoopBlockingGuard; ComponentRuntimeStats runtime_stats_; #endif }; @@ -619,59 +619,7 @@ class PollingComponent : public Component { uint32_t update_interval_; }; -// millis() and micros() are available via hal.h - -class WarnIfComponentBlockingGuard { - public: - WarnIfComponentBlockingGuard(Component *component, uint32_t start_time) - : started_(start_time), - component_(component) -#ifdef USE_RUNTIME_STATS - , - started_us_(micros()) -#endif - { - } - - // Finish the timing operation and return the current time (millis) - // Inlined: the fast path is just millis() + subtract + compare - inline uint32_t HOT finish() { -#ifdef USE_RUNTIME_STATS - uint32_t elapsed_us = micros() - this->started_us_; - // component_ is nullptr for self-keyed scheduler items (set_timeout/set_interval(self, ...)) - if (this->component_ != nullptr) { - this->component_->runtime_stats_.record_time(elapsed_us); - } else { - // Still accumulate into the global counter so Application::loop() can subtract - // this time from before_loop_tasks_ wall time. - ComponentRuntimeStats::global_recorded_us += elapsed_us; - } -#endif - uint32_t curr_time = MillisInternal::get(); -#ifndef USE_BENCHMARK - // Fast path: compare against constant threshold in ms (computed at compile time from centiseconds) - static constexpr uint32_t WARN_IF_BLOCKING_OVER_MS = static_cast(WARN_IF_BLOCKING_OVER_CS) * 10U; - uint32_t blocking_time = curr_time - this->started_; - if (blocking_time > WARN_IF_BLOCKING_OVER_MS) [[unlikely]] { - warn_blocking(this->component_, blocking_time); - } -#endif - return curr_time; - } - - ~WarnIfComponentBlockingGuard() = default; - - protected: - uint32_t started_; - Component *component_; -#ifdef USE_RUNTIME_STATS - uint32_t started_us_; -#endif - - private: - // Cold path for blocking warning - defined in component.cpp - static void __attribute__((noinline, cold)) warn_blocking(Component *component, uint32_t blocking_time); -}; +// LoopBlockingGuard lives in application.h because it reads its state from App. // Function to clear setup priority overrides after all components are set up // Only has an implementation when USE_SETUP_PRIORITY_OVERRIDE is defined diff --git a/esphome/core/millis_internal.h b/esphome/core/millis_internal.h index bc1d55a1c4..7297d22357 100644 --- a/esphome/core/millis_internal.h +++ b/esphome/core/millis_internal.h @@ -16,7 +16,7 @@ namespace esphome { // Friend-gated accessor for a fast millis() variant intended only for // known task-context callers on the main loop hot path (Application::loop() -// and WarnIfComponentBlockingGuard::finish()). It skips the ISR-context +// and LoopBlockingGuard::finish()). It skips the ISR-context // dispatch that the public esphome::millis() pays on ESP32 and libretiny. // // MUST NOT be called from ISR context: on ESP32 and libretiny it calls the @@ -50,7 +50,7 @@ class MillisInternal { #endif } friend class Application; - friend class WarnIfComponentBlockingGuard; + friend class LoopBlockingGuard; }; } // namespace esphome diff --git a/esphome/core/scheduler.cpp b/esphome/core/scheduler.cpp index a7c624486d..15bb9ea239 100644 --- a/esphome/core/scheduler.cpp +++ b/esphome/core/scheduler.cpp @@ -131,7 +131,8 @@ bool Scheduler::is_retry_cancelled_locked_(Component *component, NameType name_t // name_type determines storage type: STATIC_STRING uses static_name, others use hash_or_id void HOT Scheduler::set_timer_common_(Component *component, SchedulerItem::Type type, NameType name_type, const char *static_name, uint32_t hash_or_id, uint32_t delay, - std::function &&func, bool is_retry, bool skip_cancel) { + std::function &&func, bool is_retry, bool skip_cancel, + const LogString *source) { if (delay == SCHEDULER_DONT_RUN) { // Still need to cancel existing timer if we have a name/id if (!skip_cancel) { @@ -174,7 +175,12 @@ void HOT Scheduler::set_timer_common_(Component *component, SchedulerItem::Type // Create and populate the scheduler item SchedulerItem *item = this->get_item_from_pool_locked_(); - item->component = component; + // SELF_POINTER items store the source name (owning script) in the union slot instead of a component. + if (name_type == NameType::SELF_POINTER) { + item->source_name = source; + } else { + item->component = component; + } item->set_name(name_type, static_name, hash_or_id); item->type = type; // Use destroy + placement-new instead of move-assignment. @@ -642,8 +648,8 @@ uint32_t HOT Scheduler::call(uint32_t now) { // Not reached timeout yet, done for this call break; } - // Don't run on failed components - if (item->component != nullptr && item->component->is_failed()) { + // Don't run on failed components (is_item_failed_ exempts SELF_POINTER delays). + if (this->is_item_failed_(item)) { LockGuard guard{this->lock_}; this->recycle_item_main_loop_(this->pop_raw_locked_()); continue; @@ -790,10 +796,21 @@ Scheduler::SchedulerItem *HOT Scheduler::pop_raw_locked_() { // Helper to execute a scheduler item uint32_t HOT Scheduler::execute_item_(SchedulerItem *item, uint32_t now) { - App.set_current_component(item->component); - // Freshen so callbacks reading App.get_loop_component_start_time() see this item's dispatch time. - App.set_loop_component_start_time_(now); - WarnIfComponentBlockingGuard guard{item->component, now}; + // Resolve the component and (for SELF_POINTER/deferred items) the source name from the shared + // union slot with a single name-type check. Self-keyed items have no owning component; their slot + // holds the source name (e.g. the owning script), published so deferred work chained inside the + // callback re-captures it and the blocking warning can name the script instead of "". + Component *component; + const LogString *source; + if (item->get_name_type() == NameType::SELF_POINTER) { + component = nullptr; + source = item->source_name; + } else { + component = item->component; + source = nullptr; + } + // Guard publishes the item's identity + dispatch time, then times the callback. + LoopBlockingGuard guard{component, source, now}; item->callback(); uint32_t end = guard.finish(); // Feed the watchdog after each scheduled item (both main heap and defer diff --git a/esphome/core/scheduler.h b/esphome/core/scheduler.h index b640aa86fe..378c0fb94b 100644 --- a/esphome/core/scheduler.h +++ b/esphome/core/scheduler.h @@ -183,11 +183,12 @@ class Scheduler { protected: struct SchedulerItem { - // Ordered by size to minimize padding. - // `component` while live; `next_free` while in scheduler_item_pool_head_ (mutually exclusive). + // Ordered by size to minimize padding. Mutually exclusive by state; read the component via + // get_component() so SELF_POINTER items read as component-less. union { - Component *component; - SchedulerItem *next_free; + Component *component; // live, non-SELF_POINTER: owning component + const LogString *source_name; // live SELF_POINTER: owning script name (log attribution) + SchedulerItem *next_free; // while pooled }; // Optimized name storage using tagged union - zero heap allocation union { @@ -302,14 +303,23 @@ class Scheduler { next_execution_high_ = static_cast(value >> 32); } constexpr const char *get_type_str() const { return (type == TIMEOUT) ? "timeout" : "interval"; } - const LogString *get_source() const { return component ? component->get_component_log_str() : LOG_STR("unknown"); } + // The owning component, or nullptr for SELF_POINTER items (whose slot holds source_name instead). + // All component access goes through this so SELF_POINTER items read as component-less. + Component *get_component() const { return name_type_ == NameType::SELF_POINTER ? nullptr : component; } + const LogString *get_source() const { + // Same no-source label as warn_blocking, for consistent log vocabulary. + if (name_type_ == NameType::SELF_POINTER) + return source_name != nullptr ? source_name : LOG_STR("a scheduled task"); + return component != nullptr ? component->get_component_log_str() : LOG_STR("unknown"); + } }; // Common implementation for both timeout and interval // name_type determines storage type: STATIC_STRING uses static_name, others use hash_or_id + // `source` is stored (in the union slot) only for SELF_POINTER items; ignored otherwise. void set_timer_common_(Component *component, SchedulerItem::Type type, NameType name_type, const char *static_name, uint32_t hash_or_id, uint32_t delay, std::function &&func, bool is_retry = false, - bool skip_cancel = false); + bool skip_cancel = false, const LogString *source = nullptr); // Common implementation for retry - Remove before 2026.8.0 // name_type determines storage type: STATIC_STRING uses static_name, others use hash_or_id @@ -402,8 +412,10 @@ class Scheduler { // Fixes: https://github.com/esphome/esphome/issues/11940 if (item == nullptr) return false; - if (item->component != component || item->type != type || (skip_removed && this->is_item_removed_locked_(item)) || - (match_retry && !item->is_retry)) { + // get_component() is nullptr for SELF_POINTER items (their cancels pass nullptr too), so they + // match by the `this` key alone. + if (item->get_component() != component || item->type != type || + (skip_removed && this->is_item_removed_locked_(item)) || (match_retry && !item->is_retry)) { return false; } // Name type must match @@ -423,11 +435,16 @@ class Scheduler { // Helper to execute a scheduler item uint32_t execute_item_(SchedulerItem *item, uint32_t now); - // Helper to check if item should be skipped - bool should_skip_item_(SchedulerItem *item) const { - return is_item_removed_(item) || (item->component != nullptr && item->component->is_failed()); + // True if the item's component is failed (so it must not run). SELF_POINTER delays have no + // component (get_component() == nullptr) and always fire. + bool is_item_failed_(SchedulerItem *item) const { + Component *component = item->get_component(); + return component != nullptr && component->is_failed(); } + // Helper to check if item should be skipped + bool should_skip_item_(SchedulerItem *item) const { return is_item_removed_(item) || this->is_item_failed_(item); } + // Helper to recycle a SchedulerItem back to the pool. // Takes a raw pointer — caller transfers ownership. The item is either added to the // pool or deleted if the pool is full. diff --git a/tests/integration/fixtures/scheduler_blocking_warning.yaml b/tests/integration/fixtures/scheduler_blocking_warning.yaml new file mode 100644 index 0000000000..594ec46afb --- /dev/null +++ b/tests/integration/fixtures/scheduler_blocking_warning.yaml @@ -0,0 +1,22 @@ +esphome: + name: scheduler-blocking-warning + on_boot: + then: + - script.execute: blocking_script + +host: +api: +logger: + level: DEBUG + +# The busy-block runs in the second delay's continuation; the warning must name the script. Two +# delays verify the source survives chained delays (the scheduler republishes it each continuation). +script: + - id: blocking_script + then: + - delay: 10ms + - delay: 10ms + - lambda: |- + const uint32_t start = millis(); + while (millis() - start < 80) { + } diff --git a/tests/integration/fixtures/scheduler_blocking_warning_generic_source.yaml b/tests/integration/fixtures/scheduler_blocking_warning_generic_source.yaml new file mode 100644 index 0000000000..2d8a62f25b --- /dev/null +++ b/tests/integration/fixtures/scheduler_blocking_warning_generic_source.yaml @@ -0,0 +1,30 @@ +esphome: + name: scheduler-blocking-generic + +host: +api: +logger: + level: DEBUG + +globals: + - id: done + type: bool + restore_value: false + initial_value: "false" + +# A delay in a plain (non-script) automation has no owning script, so the block must log the +# generic "a scheduled task" label, not a script name. +interval: + - interval: 100ms + id: gen_interval + then: + - if: + condition: + lambda: "return !id(done);" + then: + - lambda: "id(done) = true;" + - delay: 10ms + - lambda: |- + const uint32_t start = millis(); + while (millis() - start < 80) { + } diff --git a/tests/integration/fixtures/scheduler_delay_runs_on_failed_component.yaml b/tests/integration/fixtures/scheduler_delay_runs_on_failed_component.yaml new file mode 100644 index 0000000000..860fa00c37 --- /dev/null +++ b/tests/integration/fixtures/scheduler_delay_runs_on_failed_component.yaml @@ -0,0 +1,29 @@ +esphome: + name: scheduler-delay-failed + +host: +api: +logger: + level: DEBUG + +globals: + - id: started + type: bool + restore_value: false + initial_value: "false" + +# The interval marks itself failed, then schedules a delay. The delay must still fire: a failed +# component must not drop it, since the SELF_POINTER scheduler item has no owning component. +interval: + - interval: 100ms + id: host_interval + then: + - if: + condition: + lambda: "return !id(started);" + then: + - lambda: |- + id(started) = true; + id(host_interval)->mark_failed(); + - delay: 200ms + - logger.log: "DELAY_FIRED_AFTER_FAIL" diff --git a/tests/integration/test_scheduler_blocking_warning.py b/tests/integration/test_scheduler_blocking_warning.py new file mode 100644 index 0000000000..699a5bc746 --- /dev/null +++ b/tests/integration/test_scheduler_blocking_warning.py @@ -0,0 +1,120 @@ +"""Integration tests for blocking-warning source attribution. + +A blocking operation that runs inside a deferred scheduler continuation (e.g. after a ``delay`` +in a script) used to be reported as `` took a long time for an operation (NN ms), +max is 30 ms`` because the continuation carries no component. The warning should instead name +the owning script and report the real threshold (50 ms). +""" + +from __future__ import annotations + +import asyncio +import re + +import pytest + +from .types import APIClientConnectedFactory, RunCompiledFunction + +# Matches: " took a long time for an operation (NN ms), max is NN ms" +WARN_PATTERN = re.compile( + r"(\S+) took a long time for an operation \((\d+) ms\), max is (\d+) ms" +) + + +@pytest.mark.asyncio +async def test_scheduler_blocking_warning( + yaml_config: str, + run_compiled: RunCompiledFunction, + api_client_connected: APIClientConnectedFactory, +) -> None: + """Deferred blocking work inside a script is attributed to the script, not "".""" + loop = asyncio.get_running_loop() + warning_future: asyncio.Future[str] = loop.create_future() + + def check_output(line: str) -> None: + if WARN_PATTERN.search(line) and not warning_future.done(): + warning_future.set_result(line) + + async with ( + run_compiled(yaml_config, line_callback=check_output), + api_client_connected() as client, + ): + device_info = await client.device_info() + assert device_info is not None + + # on_boot runs the script, which defers via delay then busy-blocks > 50 ms in the + # continuation, tripping the blocking warning. + warning_line = await asyncio.wait_for(warning_future, timeout=10.0) + + # Must name the owning script, not "" and not the generic fallback. + assert "" not in warning_line, ( + f"Warning should name the script, got: {warning_line}" + ) + assert "a scheduled task" not in warning_line, ( + f"Warning should name the script, got: {warning_line}" + ) + match = WARN_PATTERN.search(warning_line) + assert match is not None + assert match.group(1) == "blocking_script", ( + f"Warning should name 'blocking_script', got: {warning_line}" + ) + # The reported threshold must be the real default (50 ms), not the stale "30 ms". + assert match.group(3) == "50", f"Expected 'max is 50 ms', got: {warning_line}" + + +@pytest.mark.asyncio +async def test_scheduler_blocking_warning_generic_source( + yaml_config: str, + run_compiled: RunCompiledFunction, + api_client_connected: APIClientConnectedFactory, +) -> None: + """A delay in a plain (non-script) automation logs the generic label, not a script name.""" + loop = asyncio.get_running_loop() + warning_future: asyncio.Future[str] = loop.create_future() + + def check_output(line: str) -> None: + if WARN_PATTERN.search(line) and not warning_future.done(): + warning_future.set_result(line) + + async with ( + run_compiled(yaml_config, line_callback=check_output), + api_client_connected() as client, + ): + assert await client.device_info() is not None + warning_line = await asyncio.wait_for(warning_future, timeout=10.0) + + assert "a scheduled task took a long time" in warning_line, ( + f"Non-script deferred work should log the generic label, got: {warning_line}" + ) + assert "" not in warning_line + match = WARN_PATTERN.search(warning_line) + assert match is not None and match.group(3) == "50", ( + f"Expected 'max is 50 ms', got: {warning_line}" + ) + + +@pytest.mark.asyncio +async def test_scheduler_delay_runs_on_failed_component( + yaml_config: str, + run_compiled: RunCompiledFunction, + api_client_connected: APIClientConnectedFactory, +) -> None: + """A delay must still fire even when its context component is marked failed. + + Deferred (SELF_POINTER) scheduler items have no owning component, so the scheduler's + failed-component skip must not drop them. + """ + loop = asyncio.get_running_loop() + fired: asyncio.Future[bool] = loop.create_future() + + def check_output(line: str) -> None: + if "DELAY_FIRED_AFTER_FAIL" in line and not fired.done(): + fired.set_result(True) + + async with ( + run_compiled(yaml_config, line_callback=check_output), + api_client_connected() as client, + ): + assert await client.device_info() is not None + # If the failed host component wrongly dropped the delay, this times out. + await asyncio.wait_for(fired, timeout=10.0)