[router] Add a router speaker component to runtime choose output speaker (#16592)

2026-06-24 11:25:35 +00:00 · 2026-05-25 12:42:49 -04:00
parent 1c7ae96e42
commit 892e116680
7 changed files with 503 additions and 0 deletions
--- a/1
+++ b/1
@@ -417,6 +417,7 @@ esphome/components/restart/* @esphome/core
 esphome/components/rf_bridge/* @jesserockz
 esphome/components/rgbct/* @jesserockz
 esphome/components/ring_buffer/* @kahrendt
+esphome/components/router/speaker/* @kahrendt
 esphome/components/rp2040/* @jesserockz
 esphome/components/rp2040_ble/* @bdraco
 esphome/components/rp2040_pio_led_strip/* @Papa-DMan
--- a/esphome/components/router/init.py
+++ b/esphome/components/router/init.py
--- a/esphome/components/router/speaker/init.py
+++ b/esphome/components/router/speaker/init.py
@@ -0,0 +1,123 @@
+from esphome import automation, core
+import esphome.codegen as cg
+from esphome.components import audio, speaker
+import esphome.config_validation as cv
+from esphome.const import (
+    CONF_BITS_PER_SAMPLE,
+    CONF_ID,
+    CONF_NUM_CHANNELS,
+    CONF_OUTPUT_SPEAKER,
+    CONF_SAMPLE_RATE,
+)
+from esphome.core import ID
+from esphome.cpp_generator import MockObj
+from esphome.types import ConfigType, TemplateArgsType
+
+CODEOWNERS = ["@kahrendt"]
+
+CONF_OUTPUT_SPEAKERS = "output_speakers"
+CONF_TARGET_SPEAKER = "target_speaker"
+
+router_ns = cg.esphome_ns.namespace("router")
+Router = router_ns.class_("Router", cg.Component, speaker.Speaker)
+SwitchOutputAction = router_ns.class_("SwitchOutputAction", automation.Action)
+
+SpeakerPtr = speaker.Speaker.operator("ptr")
+
+
+def _set_stream_limits(config: ConfigType) -> ConfigType:
+    # Lock the router's stream limits to the user-declared format. Limits are set
+    # at CONFIG_SCHEMA time so they're visible to other components' FINAL_VALIDATE
+    # (which has no guaranteed ordering vs. ours).
+    audio.set_stream_limits(
+        min_bits_per_sample=config[CONF_BITS_PER_SAMPLE],
+        max_bits_per_sample=config[CONF_BITS_PER_SAMPLE],
+        min_channels=config[CONF_NUM_CHANNELS],
+        max_channels=config[CONF_NUM_CHANNELS],
+        min_sample_rate=config[CONF_SAMPLE_RATE],
+        max_sample_rate=config[CONF_SAMPLE_RATE],
+    )(config)
+    return config
+
+
+CONFIG_SCHEMA = cv.All(
+    cv.Schema(
+        {
+            cv.GenerateID(): cv.declare_id(Router),
+            cv.Required(CONF_OUTPUT_SPEAKERS): cv.All(
+                cv.ensure_list(cv.use_id(speaker.Speaker)),
+                cv.Length(min=2, max=8),
+            ),
+            # All outputs must agree on a single format so the producer can keep
+            # streaming through a switch without reconfiguring. These are required
+            # rather than inherited because downstream components (e.g. mixer)
+            # read them from the router's declaration during FINAL_VALIDATE,
+            # which can't depend on our FINAL_VALIDATE running first.
+            cv.Required(CONF_BITS_PER_SAMPLE): cv.int_range(8, 32),
+            cv.Required(CONF_NUM_CHANNELS): cv.int_range(1, 2),
+            cv.Required(CONF_SAMPLE_RATE): cv.int_range(8000, 96000),
+        }
+    ).extend(cv.COMPONENT_SCHEMA),
+    cv.only_on_esp32,
+    _set_stream_limits,
+)
+
+
+def _final_validate(config: ConfigType) -> ConfigType:
+    # Validate every configured output speaker can accept the router's format.
+    # Switching to an output that can't reproduce the format the producer is
+    # already sending would otherwise fail silently at runtime.
+    for spk_id in config[CONF_OUTPUT_SPEAKERS]:
+        proxy = {**config, CONF_OUTPUT_SPEAKER: spk_id}
+        audio.final_validate_audio_schema(
+            "router",
+            audio_device=CONF_OUTPUT_SPEAKER,
+            bits_per_sample=config[CONF_BITS_PER_SAMPLE],
+            channels=config[CONF_NUM_CHANNELS],
+            sample_rate=config[CONF_SAMPLE_RATE],
+        )(proxy)
+    return config
+
+
+FINAL_VALIDATE_SCHEMA = _final_validate
+
+
+async def to_code(config: ConfigType) -> None:
+    var = cg.new_Pvariable(config[CONF_ID])
+    await cg.register_component(var, config)
+
+    # The first configured output is the default active output on boot.
+    speakers = config[CONF_OUTPUT_SPEAKERS]
+    cg.add(var.set_output_count(len(speakers)))
+    for spk_id in speakers:
+        spk = await cg.get_variable(spk_id)
+        cg.add(var.add_output(spk))
+
+
+@automation.register_action(
+    "router.speaker.switch_output",
+    SwitchOutputAction,
+    cv.Schema(
+        {
+            cv.GenerateID(CONF_ID): cv.use_id(Router),
+            cv.Required(CONF_TARGET_SPEAKER): cv.templatable(
+                cv.use_id(speaker.Speaker)
+            ),
+        }
+    ),
+    synchronous=True,
+)
+async def switch_output_to_code(
+    config: ConfigType,
+    action_id: ID,
+    template_arg: cg.TemplateArguments,
+    args: TemplateArgsType,
+) -> MockObj:
+    parent = await cg.get_variable(config[CONF_ID])
+    var = cg.new_Pvariable(action_id, template_arg, parent)
+    target = config[CONF_TARGET_SPEAKER]
+    if not isinstance(target, core.Lambda):
+        target = await cg.get_variable(target)
+    template_ = await cg.templatable(target, args, SpeakerPtr)
+    cg.add(var.set_target(template_))
+    return var
--- a/esphome/components/router/speaker/router_speaker.cpp
+++ b/esphome/components/router/speaker/router_speaker.cpp
@@ -0,0 +1,236 @@
+#include "router_speaker.h"
+
+#ifdef USE_ESP32
+
+#include "esphome/core/log.h"
+
+#include "esp_timer.h"
+
+#include <algorithm>
+
+namespace esphome::router {
+
+static const char *const TAG = "router.speaker";
+
+static inline uint32_t atomic_subtract_clamped(std::atomic<uint32_t> &var, uint32_t amount) {
+  uint32_t current = var.load(std::memory_order_acquire);
+  uint32_t subtracted = 0;
+  if (current > 0) {
+    uint32_t new_value;
+    do {
+      subtracted = std::min(amount, current);
+      new_value = current - subtracted;
+    } while (!var.compare_exchange_weak(current, new_value, std::memory_order_release, std::memory_order_acquire));
+  }
+  return subtracted;
+}
+
+void Router::setup() {
+  // Register a callback on every configured output. Each lambda captures its own
+  // index and only forwards when that output is the active one. This is required
+  // because CallbackManager has no remove() API.
+  for (size_t i = 0; i < this->outputs_.size(); i++) {
+    this->outputs_[i]->add_audio_output_callback([this, i](uint32_t frames, int64_t timestamp_us) {
+      // Always suppress the draining previous output during a switch, even if it's
+      // also the reselected active output (switching back to the bus holder).
+      // loop() fires one synthetic credit for its in-flight frames instead.
+      if (this->pending_start_prev_idx_.load(std::memory_order_relaxed) == static_cast<int8_t>(i)) {
+        return;
+      }
+      if (this->active_output_idx_.load(std::memory_order_relaxed) != static_cast<int8_t>(i)) {
+        return;
+      }
+      atomic_subtract_clamped(this->frames_in_pipeline_, frames);
+      this->audio_output_callback_.call(frames, timestamp_us);
+    });
+  }
+}
+
+void Router::loop() {
+  speaker::Speaker *active = this->get_active_output();
+
+  // Mid-switch: the new output's start() is deferred until the previous output
+  // fully releases shared hardware (e.g. a single i2s_audio bus driving two
+  // speakers). Starting earlier produces "Parent bus is busy" retries. The
+  // synthetic-credit callback is also deferred until prev is fully stopped, so
+  // that once its task has drained no natural callbacks can race ours.
+  const int8_t pending_prev_idx = this->pending_start_prev_idx_.load(std::memory_order_relaxed);
+  if (pending_prev_idx >= 0) {
+    speaker::Speaker *prev = this->outputs_[pending_prev_idx];
+    if (prev->is_stopped()) {
+      this->pending_start_prev_idx_.store(-1, std::memory_order_relaxed);
+
+      // Credit any frames left in prev's ring buffer / DMA so producer frame
+      // accounting (SpeakerSourceMediaPlayer pending_frames, sendspin/AEC
+      // clocks) clears cleanly. The leftover audio is intentionally dropped and
+      // the producer is told it played "now", giving a clean discontinuity that
+      // keeps frame accounting consistent across the switch.
+      const uint32_t in_flight = this->frames_in_pipeline_.exchange(0, std::memory_order_acq_rel);
+      if (in_flight > 0) {
+        this->audio_output_callback_.call(in_flight, esp_timer_get_time());
+      }
+
+      this->apply_cached_state_to_active_();
+      this->state_ = speaker::STATE_STARTING;
+      active->start();
+    }
+    return;
+  }
+
+  // Mirror the active output's running/stopped state into our own state_ so that
+  // is_running() / is_stopped() stay accurate from the producer's perspective.
+  // Also catch the active output self-stopping (e.g. i2s_audio silence timeout):
+  // without this, our state_ would stay RUNNING forever and the next play() would
+  // skip start(). The output retains its own volume/mute across a restart (and we
+  // forward those live regardless), but stream info arrives via the non-virtual
+  // set_audio_stream_info() and never reaches the output on its own; if the format
+  // changed while stopped, only start()'s apply_cached_state_to_active_() pushes it
+  // down before the output's play()-side auto-start locks in the stale format.
+  if (active->is_stopped()) {
+    this->state_ = speaker::STATE_STOPPED;
+  } else if (this->state_ == speaker::STATE_STARTING && active->is_running()) {
+    this->state_ = speaker::STATE_RUNNING;
+  }
+}
+
+void Router::dump_config() {
+  ESP_LOGCONFIG(TAG,
+                "Router Speaker:\n"
+                "  Outputs: %u",
+                static_cast<unsigned>(this->outputs_.size()));
+}
+
+size_t Router::play(const uint8_t *data, size_t length, TickType_t ticks_to_wait) {
+  speaker::Speaker *active = this->get_active_output();
+
+  // Drop frames during a mid-switch until the old output releases shared hardware;
+  // forwarding now would trigger the new output's play()-side auto-start while
+  // the bus is still busy.
+  if (this->pending_start_prev_idx_.load(std::memory_order_relaxed) >= 0) {
+    vTaskDelay(ticks_to_wait);
+    return 0;
+  }
+
+  // Producers (e.g. mixer) set stream info on us and then drive play() from a
+  // task without ever calling our start(). i2s_audio's play() auto-starts the
+  // underlying driver, so we must push our cached stream info to the active
+  // output before that auto-start, or it locks to its default (16k mono).
+  if (this->state_ == speaker::STATE_STOPPED) {
+    this->start();
+    vTaskDelay(ticks_to_wait);
+    ticks_to_wait = 0;
+  }
+
+  size_t written = active->play(data, length, ticks_to_wait);
+  if (written > 0) {
+    const uint32_t frames = this->audio_stream_info_.bytes_to_frames(written);
+    this->frames_in_pipeline_.fetch_add(frames, std::memory_order_release);
+  }
+  return written;
+}
+
+void Router::start() {
+  this->frames_in_pipeline_.store(0, std::memory_order_release);
+  this->apply_cached_state_to_active_();
+  this->state_ = speaker::STATE_STARTING;
+  this->get_active_output()->start();
+}
+
+void Router::stop() {
+  // Cancel any pending mid-switch start; the producer wants us stopped.
+  this->pending_start_prev_idx_.store(-1, std::memory_order_relaxed);
+  this->state_ = speaker::STATE_STOPPING;
+  this->get_active_output()->stop();
+}
+
+void Router::finish() {
+  this->pending_start_prev_idx_.store(-1, std::memory_order_relaxed);
+  this->state_ = speaker::STATE_STOPPING;
+  this->get_active_output()->finish();
+}
+
+bool Router::has_buffered_data() const { return this->get_active_output()->has_buffered_data(); }
+
+void Router::set_pause_state(bool pause_state) {
+  this->cached_pause_ = pause_state;
+  this->get_active_output()->set_pause_state(pause_state);
+}
+
+void Router::set_volume(float volume) {
+  this->volume_ = volume;
+  this->get_active_output()->set_volume(volume);
+}
+
+void Router::set_mute_state(bool mute_state) {
+  this->mute_state_ = mute_state;
+  this->get_active_output()->set_mute_state(mute_state);
+}
+
+bool Router::switch_to_output(speaker::Speaker *target) {
+  if (target == nullptr) {
+    return false;
+  }
+
+  int8_t new_idx = -1;
+  for (size_t i = 0; i < this->outputs_.size(); i++) {
+    if (this->outputs_[i] == target) {
+      new_idx = static_cast<int8_t>(i);
+      break;
+    }
+  }
+  if (new_idx < 0) {
+    ESP_LOGW(TAG, "Switch target is not a configured output");
+    return false;
+  }
+  if (new_idx == this->active_output_idx_.load(std::memory_order_relaxed)) {
+    return true;
+  }
+
+  // A switch is already in flight: pending_start_prev_idx_ is still releasing the
+  // shared bus and the current active output's start() is still deferred (it never
+  // started). Just redirect which output we start once the bus frees. Leave the bus
+  // holder (pending_start_prev_idx_), the in-flight frame counter (loop() still owes one
+  // synthetic credit for the bus holder's in-flight frames), and state_ alone, and
+  // don't stop the current active output, which never started.
+  if (this->pending_start_prev_idx_.load(std::memory_order_relaxed) >= 0) {
+    this->active_output_idx_.store(new_idx, std::memory_order_relaxed);
+    return true;
+  }
+
+  const bool was_active = (this->state_ == speaker::STATE_STARTING || this->state_ == speaker::STATE_RUNNING);
+  const int8_t old_idx = this->active_output_idx_.load(std::memory_order_relaxed);
+
+  if (was_active) {
+    this->outputs_[old_idx]->stop();
+  }
+
+  this->active_output_idx_.store(new_idx, std::memory_order_relaxed);
+
+  if (was_active) {
+    // Defer start and the synthetic-credit callback until the old output's
+    // task is fully stopped; loop() handles both. Firing the synthetic credit
+    // here would race the old task's still-in-flight natural callbacks,
+    // dispatching audio_output_callback_ concurrently from two threads, which
+    // some consumers (e.g. sendspin's progress sync) aren't reentrant-safe for.
+    // STATE_STOPPING keeps producers from observing a transient stopped state
+    // and lets our play() short-circuit so the new output's play() doesn't
+    // auto-start it while the shared bus is still being released.
+    this->state_ = speaker::STATE_STOPPING;
+    this->pending_start_prev_idx_.store(old_idx, std::memory_order_relaxed);
+  } else {
+    this->frames_in_pipeline_.store(0, std::memory_order_release);
+  }
+  return true;
+}
+
+void Router::apply_cached_state_to_active_() {
+  speaker::Speaker *active = this->get_active_output();
+  active->set_audio_stream_info(this->audio_stream_info_);
+  active->set_volume(this->volume_);
+  active->set_mute_state(this->mute_state_);
+  active->set_pause_state(this->cached_pause_);
+}
+
+}  // namespace esphome::router
+
+#endif  // USE_ESP32
--- a/esphome/components/router/speaker/router_speaker.h
+++ b/esphome/components/router/speaker/router_speaker.h
@@ -0,0 +1,92 @@
+#pragma once
+
+#ifdef USE_ESP32
+
+#include "esphome/components/speaker/speaker.h"
+#include "esphome/core/automation.h"
+#include "esphome/core/component.h"
+#include "esphome/core/helpers.h"
+
+#include <freertos/FreeRTOS.h>
+
+#include <atomic>
+
+namespace esphome::router {
+
+class Router : public Component, public speaker::Speaker {
+ public:
+  float get_setup_priority() const override { return setup_priority::DATA; }
+
+  void setup() override;
+  void loop() override;
+  void dump_config() override;
+
+  size_t play(const uint8_t *data, size_t length) override { return this->play(data, length, 0); }
+  size_t play(const uint8_t *data, size_t length, TickType_t ticks_to_wait) override;
+
+  void start() override;
+  void stop() override;
+  void finish() override;
+
+  bool has_buffered_data() const override;
+
+  void set_pause_state(bool pause_state) override;
+  bool get_pause_state() const override { return this->cached_pause_; }
+
+  void set_volume(float volume) override;
+  float get_volume() override { return this->volume_; }
+
+  void set_mute_state(bool mute_state) override;
+  bool get_mute_state() override { return this->mute_state_; }
+
+  // Allocate the output list to its final size. Must be called before add_output().
+  void set_output_count(size_t count) { this->outputs_.init(count); }
+  void add_output(speaker::Speaker *spk) { this->outputs_.push_back(spk); }
+
+  /// Switch the active output to the given speaker. Must be one of the configured outputs.
+  /// Returns false if `target` is not in the output list.
+  bool switch_to_output(speaker::Speaker *target);
+
+  // Always valid: active_output_idx_ stays within [0, outputs_.size()) and at least
+  // two outputs are required (validated in Python), so this never returns null.
+  speaker::Speaker *get_active_output() const {
+    return this->outputs_[this->active_output_idx_.load(std::memory_order_relaxed)];
+  }
+
+ protected:
+  // Frames written to the active output but not yet played: incremented in play() and decremented
+  // (clamped at zero) by the active output's audio_output_callback. Mirrors mixer_speaker's
+  // frames_in_pipeline_.
+  std::atomic<uint32_t> frames_in_pipeline_{0};
+
+  bool cached_pause_{false};
+
+  void apply_cached_state_to_active_();
+
+  // Index of the previously-active output we're waiting on to fully stop before
+  // starting the new one. -1 means no pending start. Set by switch_to_output()
+  // when switching mid-playback; cleared by loop() once the old output reports
+  // is_stopped(). Required because shared-bus drivers (e.g. two i2s_audio
+  // speakers on one i2s_bus) reject start() until the previous user releases.
+  std::atomic<int8_t> pending_start_prev_idx_{-1};
+
+ private:
+  FixedVector<speaker::Speaker *> outputs_;
+  // Index into outputs_, always within [0, outputs_.size()). Defaults to the first
+  // configured output; updated by switch_to_output().
+  std::atomic<int8_t> active_output_idx_{0};
+};
+
+template<typename... Ts> class SwitchOutputAction : public Action<Ts...> {
+ public:
+  explicit SwitchOutputAction(Router *parent) : parent_(parent) {}
+  TEMPLATABLE_VALUE(speaker::Speaker *, target)
+  void play(const Ts &...x) override { this->parent_->switch_to_output(this->target_.value(x...)); }
+
+ protected:
+  Router *parent_;
+};
+
+}  // namespace esphome::router
+
+#endif  // USE_ESP32
--- a/tests/components/router/common.yaml
+++ b/tests/components/router/common.yaml
@@ -0,0 +1,44 @@
+esphome:
+  on_boot:
+    then:
+      - router.speaker.switch_output:
+          id: router_id
+          target_speaker: speaker_b_id
+      # id omitted: auto-resolved since there's a single router instance
+      - router.speaker.switch_output:
+          target_speaker: !lambda return id(speaker_a_id);
+
+i2s_audio:
+  - id: i2s_a
+    i2s_lrclk_pin: ${a_lrclk_pin}
+    i2s_bclk_pin: ${a_bclk_pin}
+  - id: i2s_b
+
+speaker:
+  - platform: i2s_audio
+    id: speaker_a_id
+    i2s_audio_id: i2s_a
+    dac_type: external
+    i2s_dout_pin: ${a_dout_pin}
+    sample_rate: 48000
+    bits_per_sample: 16bit
+    channel: stereo
+  - platform: i2s_audio
+    id: speaker_b_id
+    i2s_audio_id: i2s_b
+    dac_type: external
+    i2s_dout_pin: ${b_dout_pin}
+    spdif_mode: true
+    use_apll: true
+    sample_rate: 48000
+    bits_per_sample: 16bit
+    channel: stereo
+    i2s_mode: primary
+  - platform: router
+    id: router_id
+    output_speakers:
+      - speaker_a_id
+      - speaker_b_id
+    sample_rate: 48000
+    bits_per_sample: 16
+    num_channels: 2
--- a/tests/components/router/test.esp32-idf.yaml
+++ b/tests/components/router/test.esp32-idf.yaml
@@ -0,0 +1,7 @@
+substitutions:
+  a_lrclk_pin: GPIO4
+  a_bclk_pin: GPIO5
+  a_dout_pin: GPIO14
+  b_dout_pin: GPIO19
+
+<<: !include common.yaml