From 892e116680a0c8063e7d1ead63b74ed7748c89f9 Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Mon, 25 May 2026 12:42:49 -0400 Subject: [PATCH] [router] Add a router speaker component to runtime choose output speaker (#16592) --- CODEOWNERS | 1 + esphome/components/router/__init__.py | 0 esphome/components/router/speaker/__init__.py | 123 +++++++++ .../router/speaker/router_speaker.cpp | 236 ++++++++++++++++++ .../router/speaker/router_speaker.h | 92 +++++++ tests/components/router/common.yaml | 44 ++++ tests/components/router/test.esp32-idf.yaml | 7 + 7 files changed, 503 insertions(+) create mode 100644 esphome/components/router/__init__.py create mode 100644 esphome/components/router/speaker/__init__.py create mode 100644 esphome/components/router/speaker/router_speaker.cpp create mode 100644 esphome/components/router/speaker/router_speaker.h create mode 100644 tests/components/router/common.yaml create mode 100644 tests/components/router/test.esp32-idf.yaml diff --git a/CODEOWNERS b/CODEOWNERS index f8cdfdc6c6..3c3e502058 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -417,6 +417,7 @@ esphome/components/restart/* @esphome/core esphome/components/rf_bridge/* @jesserockz esphome/components/rgbct/* @jesserockz esphome/components/ring_buffer/* @kahrendt +esphome/components/router/speaker/* @kahrendt esphome/components/rp2040/* @jesserockz esphome/components/rp2040_ble/* @bdraco esphome/components/rp2040_pio_led_strip/* @Papa-DMan diff --git a/esphome/components/router/__init__.py b/esphome/components/router/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/esphome/components/router/speaker/__init__.py b/esphome/components/router/speaker/__init__.py new file mode 100644 index 0000000000..2b2dc56433 --- /dev/null +++ b/esphome/components/router/speaker/__init__.py @@ -0,0 +1,123 @@ +from esphome import automation, core +import esphome.codegen as cg +from esphome.components import audio, speaker +import esphome.config_validation as cv +from esphome.const import ( + CONF_BITS_PER_SAMPLE, + CONF_ID, + CONF_NUM_CHANNELS, + CONF_OUTPUT_SPEAKER, + CONF_SAMPLE_RATE, +) +from esphome.core import ID +from esphome.cpp_generator import MockObj +from esphome.types import ConfigType, TemplateArgsType + +CODEOWNERS = ["@kahrendt"] + +CONF_OUTPUT_SPEAKERS = "output_speakers" +CONF_TARGET_SPEAKER = "target_speaker" + +router_ns = cg.esphome_ns.namespace("router") +Router = router_ns.class_("Router", cg.Component, speaker.Speaker) +SwitchOutputAction = router_ns.class_("SwitchOutputAction", automation.Action) + +SpeakerPtr = speaker.Speaker.operator("ptr") + + +def _set_stream_limits(config: ConfigType) -> ConfigType: + # Lock the router's stream limits to the user-declared format. Limits are set + # at CONFIG_SCHEMA time so they're visible to other components' FINAL_VALIDATE + # (which has no guaranteed ordering vs. ours). + audio.set_stream_limits( + min_bits_per_sample=config[CONF_BITS_PER_SAMPLE], + max_bits_per_sample=config[CONF_BITS_PER_SAMPLE], + min_channels=config[CONF_NUM_CHANNELS], + max_channels=config[CONF_NUM_CHANNELS], + min_sample_rate=config[CONF_SAMPLE_RATE], + max_sample_rate=config[CONF_SAMPLE_RATE], + )(config) + return config + + +CONFIG_SCHEMA = cv.All( + cv.Schema( + { + cv.GenerateID(): cv.declare_id(Router), + cv.Required(CONF_OUTPUT_SPEAKERS): cv.All( + cv.ensure_list(cv.use_id(speaker.Speaker)), + cv.Length(min=2, max=8), + ), + # All outputs must agree on a single format so the producer can keep + # streaming through a switch without reconfiguring. These are required + # rather than inherited because downstream components (e.g. mixer) + # read them from the router's declaration during FINAL_VALIDATE, + # which can't depend on our FINAL_VALIDATE running first. + cv.Required(CONF_BITS_PER_SAMPLE): cv.int_range(8, 32), + cv.Required(CONF_NUM_CHANNELS): cv.int_range(1, 2), + cv.Required(CONF_SAMPLE_RATE): cv.int_range(8000, 96000), + } + ).extend(cv.COMPONENT_SCHEMA), + cv.only_on_esp32, + _set_stream_limits, +) + + +def _final_validate(config: ConfigType) -> ConfigType: + # Validate every configured output speaker can accept the router's format. + # Switching to an output that can't reproduce the format the producer is + # already sending would otherwise fail silently at runtime. + for spk_id in config[CONF_OUTPUT_SPEAKERS]: + proxy = {**config, CONF_OUTPUT_SPEAKER: spk_id} + audio.final_validate_audio_schema( + "router", + audio_device=CONF_OUTPUT_SPEAKER, + bits_per_sample=config[CONF_BITS_PER_SAMPLE], + channels=config[CONF_NUM_CHANNELS], + sample_rate=config[CONF_SAMPLE_RATE], + )(proxy) + return config + + +FINAL_VALIDATE_SCHEMA = _final_validate + + +async def to_code(config: ConfigType) -> None: + var = cg.new_Pvariable(config[CONF_ID]) + await cg.register_component(var, config) + + # The first configured output is the default active output on boot. + speakers = config[CONF_OUTPUT_SPEAKERS] + cg.add(var.set_output_count(len(speakers))) + for spk_id in speakers: + spk = await cg.get_variable(spk_id) + cg.add(var.add_output(spk)) + + +@automation.register_action( + "router.speaker.switch_output", + SwitchOutputAction, + cv.Schema( + { + cv.GenerateID(CONF_ID): cv.use_id(Router), + cv.Required(CONF_TARGET_SPEAKER): cv.templatable( + cv.use_id(speaker.Speaker) + ), + } + ), + synchronous=True, +) +async def switch_output_to_code( + config: ConfigType, + action_id: ID, + template_arg: cg.TemplateArguments, + args: TemplateArgsType, +) -> MockObj: + parent = await cg.get_variable(config[CONF_ID]) + var = cg.new_Pvariable(action_id, template_arg, parent) + target = config[CONF_TARGET_SPEAKER] + if not isinstance(target, core.Lambda): + target = await cg.get_variable(target) + template_ = await cg.templatable(target, args, SpeakerPtr) + cg.add(var.set_target(template_)) + return var diff --git a/esphome/components/router/speaker/router_speaker.cpp b/esphome/components/router/speaker/router_speaker.cpp new file mode 100644 index 0000000000..f4bf7420ab --- /dev/null +++ b/esphome/components/router/speaker/router_speaker.cpp @@ -0,0 +1,236 @@ +#include "router_speaker.h" + +#ifdef USE_ESP32 + +#include "esphome/core/log.h" + +#include "esp_timer.h" + +#include + +namespace esphome::router { + +static const char *const TAG = "router.speaker"; + +static inline uint32_t atomic_subtract_clamped(std::atomic &var, uint32_t amount) { + uint32_t current = var.load(std::memory_order_acquire); + uint32_t subtracted = 0; + if (current > 0) { + uint32_t new_value; + do { + subtracted = std::min(amount, current); + new_value = current - subtracted; + } while (!var.compare_exchange_weak(current, new_value, std::memory_order_release, std::memory_order_acquire)); + } + return subtracted; +} + +void Router::setup() { + // Register a callback on every configured output. Each lambda captures its own + // index and only forwards when that output is the active one. This is required + // because CallbackManager has no remove() API. + for (size_t i = 0; i < this->outputs_.size(); i++) { + this->outputs_[i]->add_audio_output_callback([this, i](uint32_t frames, int64_t timestamp_us) { + // Always suppress the draining previous output during a switch, even if it's + // also the reselected active output (switching back to the bus holder). + // loop() fires one synthetic credit for its in-flight frames instead. + if (this->pending_start_prev_idx_.load(std::memory_order_relaxed) == static_cast(i)) { + return; + } + if (this->active_output_idx_.load(std::memory_order_relaxed) != static_cast(i)) { + return; + } + atomic_subtract_clamped(this->frames_in_pipeline_, frames); + this->audio_output_callback_.call(frames, timestamp_us); + }); + } +} + +void Router::loop() { + speaker::Speaker *active = this->get_active_output(); + + // Mid-switch: the new output's start() is deferred until the previous output + // fully releases shared hardware (e.g. a single i2s_audio bus driving two + // speakers). Starting earlier produces "Parent bus is busy" retries. The + // synthetic-credit callback is also deferred until prev is fully stopped, so + // that once its task has drained no natural callbacks can race ours. + const int8_t pending_prev_idx = this->pending_start_prev_idx_.load(std::memory_order_relaxed); + if (pending_prev_idx >= 0) { + speaker::Speaker *prev = this->outputs_[pending_prev_idx]; + if (prev->is_stopped()) { + this->pending_start_prev_idx_.store(-1, std::memory_order_relaxed); + + // Credit any frames left in prev's ring buffer / DMA so producer frame + // accounting (SpeakerSourceMediaPlayer pending_frames, sendspin/AEC + // clocks) clears cleanly. The leftover audio is intentionally dropped and + // the producer is told it played "now", giving a clean discontinuity that + // keeps frame accounting consistent across the switch. + const uint32_t in_flight = this->frames_in_pipeline_.exchange(0, std::memory_order_acq_rel); + if (in_flight > 0) { + this->audio_output_callback_.call(in_flight, esp_timer_get_time()); + } + + this->apply_cached_state_to_active_(); + this->state_ = speaker::STATE_STARTING; + active->start(); + } + return; + } + + // Mirror the active output's running/stopped state into our own state_ so that + // is_running() / is_stopped() stay accurate from the producer's perspective. + // Also catch the active output self-stopping (e.g. i2s_audio silence timeout): + // without this, our state_ would stay RUNNING forever and the next play() would + // skip start(). The output retains its own volume/mute across a restart (and we + // forward those live regardless), but stream info arrives via the non-virtual + // set_audio_stream_info() and never reaches the output on its own; if the format + // changed while stopped, only start()'s apply_cached_state_to_active_() pushes it + // down before the output's play()-side auto-start locks in the stale format. + if (active->is_stopped()) { + this->state_ = speaker::STATE_STOPPED; + } else if (this->state_ == speaker::STATE_STARTING && active->is_running()) { + this->state_ = speaker::STATE_RUNNING; + } +} + +void Router::dump_config() { + ESP_LOGCONFIG(TAG, + "Router Speaker:\n" + " Outputs: %u", + static_cast(this->outputs_.size())); +} + +size_t Router::play(const uint8_t *data, size_t length, TickType_t ticks_to_wait) { + speaker::Speaker *active = this->get_active_output(); + + // Drop frames during a mid-switch until the old output releases shared hardware; + // forwarding now would trigger the new output's play()-side auto-start while + // the bus is still busy. + if (this->pending_start_prev_idx_.load(std::memory_order_relaxed) >= 0) { + vTaskDelay(ticks_to_wait); + return 0; + } + + // Producers (e.g. mixer) set stream info on us and then drive play() from a + // task without ever calling our start(). i2s_audio's play() auto-starts the + // underlying driver, so we must push our cached stream info to the active + // output before that auto-start, or it locks to its default (16k mono). + if (this->state_ == speaker::STATE_STOPPED) { + this->start(); + vTaskDelay(ticks_to_wait); + ticks_to_wait = 0; + } + + size_t written = active->play(data, length, ticks_to_wait); + if (written > 0) { + const uint32_t frames = this->audio_stream_info_.bytes_to_frames(written); + this->frames_in_pipeline_.fetch_add(frames, std::memory_order_release); + } + return written; +} + +void Router::start() { + this->frames_in_pipeline_.store(0, std::memory_order_release); + this->apply_cached_state_to_active_(); + this->state_ = speaker::STATE_STARTING; + this->get_active_output()->start(); +} + +void Router::stop() { + // Cancel any pending mid-switch start; the producer wants us stopped. + this->pending_start_prev_idx_.store(-1, std::memory_order_relaxed); + this->state_ = speaker::STATE_STOPPING; + this->get_active_output()->stop(); +} + +void Router::finish() { + this->pending_start_prev_idx_.store(-1, std::memory_order_relaxed); + this->state_ = speaker::STATE_STOPPING; + this->get_active_output()->finish(); +} + +bool Router::has_buffered_data() const { return this->get_active_output()->has_buffered_data(); } + +void Router::set_pause_state(bool pause_state) { + this->cached_pause_ = pause_state; + this->get_active_output()->set_pause_state(pause_state); +} + +void Router::set_volume(float volume) { + this->volume_ = volume; + this->get_active_output()->set_volume(volume); +} + +void Router::set_mute_state(bool mute_state) { + this->mute_state_ = mute_state; + this->get_active_output()->set_mute_state(mute_state); +} + +bool Router::switch_to_output(speaker::Speaker *target) { + if (target == nullptr) { + return false; + } + + int8_t new_idx = -1; + for (size_t i = 0; i < this->outputs_.size(); i++) { + if (this->outputs_[i] == target) { + new_idx = static_cast(i); + break; + } + } + if (new_idx < 0) { + ESP_LOGW(TAG, "Switch target is not a configured output"); + return false; + } + if (new_idx == this->active_output_idx_.load(std::memory_order_relaxed)) { + return true; + } + + // A switch is already in flight: pending_start_prev_idx_ is still releasing the + // shared bus and the current active output's start() is still deferred (it never + // started). Just redirect which output we start once the bus frees. Leave the bus + // holder (pending_start_prev_idx_), the in-flight frame counter (loop() still owes one + // synthetic credit for the bus holder's in-flight frames), and state_ alone, and + // don't stop the current active output, which never started. + if (this->pending_start_prev_idx_.load(std::memory_order_relaxed) >= 0) { + this->active_output_idx_.store(new_idx, std::memory_order_relaxed); + return true; + } + + const bool was_active = (this->state_ == speaker::STATE_STARTING || this->state_ == speaker::STATE_RUNNING); + const int8_t old_idx = this->active_output_idx_.load(std::memory_order_relaxed); + + if (was_active) { + this->outputs_[old_idx]->stop(); + } + + this->active_output_idx_.store(new_idx, std::memory_order_relaxed); + + if (was_active) { + // Defer start and the synthetic-credit callback until the old output's + // task is fully stopped; loop() handles both. Firing the synthetic credit + // here would race the old task's still-in-flight natural callbacks, + // dispatching audio_output_callback_ concurrently from two threads, which + // some consumers (e.g. sendspin's progress sync) aren't reentrant-safe for. + // STATE_STOPPING keeps producers from observing a transient stopped state + // and lets our play() short-circuit so the new output's play() doesn't + // auto-start it while the shared bus is still being released. + this->state_ = speaker::STATE_STOPPING; + this->pending_start_prev_idx_.store(old_idx, std::memory_order_relaxed); + } else { + this->frames_in_pipeline_.store(0, std::memory_order_release); + } + return true; +} + +void Router::apply_cached_state_to_active_() { + speaker::Speaker *active = this->get_active_output(); + active->set_audio_stream_info(this->audio_stream_info_); + active->set_volume(this->volume_); + active->set_mute_state(this->mute_state_); + active->set_pause_state(this->cached_pause_); +} + +} // namespace esphome::router + +#endif // USE_ESP32 diff --git a/esphome/components/router/speaker/router_speaker.h b/esphome/components/router/speaker/router_speaker.h new file mode 100644 index 0000000000..13b58a1c72 --- /dev/null +++ b/esphome/components/router/speaker/router_speaker.h @@ -0,0 +1,92 @@ +#pragma once + +#ifdef USE_ESP32 + +#include "esphome/components/speaker/speaker.h" +#include "esphome/core/automation.h" +#include "esphome/core/component.h" +#include "esphome/core/helpers.h" + +#include + +#include + +namespace esphome::router { + +class Router : public Component, public speaker::Speaker { + public: + float get_setup_priority() const override { return setup_priority::DATA; } + + void setup() override; + void loop() override; + void dump_config() override; + + size_t play(const uint8_t *data, size_t length) override { return this->play(data, length, 0); } + size_t play(const uint8_t *data, size_t length, TickType_t ticks_to_wait) override; + + void start() override; + void stop() override; + void finish() override; + + bool has_buffered_data() const override; + + void set_pause_state(bool pause_state) override; + bool get_pause_state() const override { return this->cached_pause_; } + + void set_volume(float volume) override; + float get_volume() override { return this->volume_; } + + void set_mute_state(bool mute_state) override; + bool get_mute_state() override { return this->mute_state_; } + + // Allocate the output list to its final size. Must be called before add_output(). + void set_output_count(size_t count) { this->outputs_.init(count); } + void add_output(speaker::Speaker *spk) { this->outputs_.push_back(spk); } + + /// Switch the active output to the given speaker. Must be one of the configured outputs. + /// Returns false if `target` is not in the output list. + bool switch_to_output(speaker::Speaker *target); + + // Always valid: active_output_idx_ stays within [0, outputs_.size()) and at least + // two outputs are required (validated in Python), so this never returns null. + speaker::Speaker *get_active_output() const { + return this->outputs_[this->active_output_idx_.load(std::memory_order_relaxed)]; + } + + protected: + // Frames written to the active output but not yet played: incremented in play() and decremented + // (clamped at zero) by the active output's audio_output_callback. Mirrors mixer_speaker's + // frames_in_pipeline_. + std::atomic frames_in_pipeline_{0}; + + bool cached_pause_{false}; + + void apply_cached_state_to_active_(); + + // Index of the previously-active output we're waiting on to fully stop before + // starting the new one. -1 means no pending start. Set by switch_to_output() + // when switching mid-playback; cleared by loop() once the old output reports + // is_stopped(). Required because shared-bus drivers (e.g. two i2s_audio + // speakers on one i2s_bus) reject start() until the previous user releases. + std::atomic pending_start_prev_idx_{-1}; + + private: + FixedVector outputs_; + // Index into outputs_, always within [0, outputs_.size()). Defaults to the first + // configured output; updated by switch_to_output(). + std::atomic active_output_idx_{0}; +}; + +template class SwitchOutputAction : public Action { + public: + explicit SwitchOutputAction(Router *parent) : parent_(parent) {} + TEMPLATABLE_VALUE(speaker::Speaker *, target) + void play(const Ts &...x) override { this->parent_->switch_to_output(this->target_.value(x...)); } + + protected: + Router *parent_; +}; + +} // namespace esphome::router + +#endif // USE_ESP32 diff --git a/tests/components/router/common.yaml b/tests/components/router/common.yaml new file mode 100644 index 0000000000..360c6daaee --- /dev/null +++ b/tests/components/router/common.yaml @@ -0,0 +1,44 @@ +esphome: + on_boot: + then: + - router.speaker.switch_output: + id: router_id + target_speaker: speaker_b_id + # id omitted: auto-resolved since there's a single router instance + - router.speaker.switch_output: + target_speaker: !lambda return id(speaker_a_id); + +i2s_audio: + - id: i2s_a + i2s_lrclk_pin: ${a_lrclk_pin} + i2s_bclk_pin: ${a_bclk_pin} + - id: i2s_b + +speaker: + - platform: i2s_audio + id: speaker_a_id + i2s_audio_id: i2s_a + dac_type: external + i2s_dout_pin: ${a_dout_pin} + sample_rate: 48000 + bits_per_sample: 16bit + channel: stereo + - platform: i2s_audio + id: speaker_b_id + i2s_audio_id: i2s_b + dac_type: external + i2s_dout_pin: ${b_dout_pin} + spdif_mode: true + use_apll: true + sample_rate: 48000 + bits_per_sample: 16bit + channel: stereo + i2s_mode: primary + - platform: router + id: router_id + output_speakers: + - speaker_a_id + - speaker_b_id + sample_rate: 48000 + bits_per_sample: 16 + num_channels: 2 diff --git a/tests/components/router/test.esp32-idf.yaml b/tests/components/router/test.esp32-idf.yaml new file mode 100644 index 0000000000..241a9a8903 --- /dev/null +++ b/tests/components/router/test.esp32-idf.yaml @@ -0,0 +1,7 @@ +substitutions: + a_lrclk_pin: GPIO4 + a_bclk_pin: GPIO5 + a_dout_pin: GPIO14 + b_dout_pin: GPIO19 + +<<: !include common.yaml