[router] Add a router speaker component to runtime choose output speaker (#16592)

This commit is contained in:
Kevin Ahrendt
2026-05-25 12:42:49 -04:00
committed by GitHub
parent 1c7ae96e42
commit 892e116680
7 changed files with 503 additions and 0 deletions

View File

@@ -417,6 +417,7 @@ esphome/components/restart/* @esphome/core
esphome/components/rf_bridge/* @jesserockz
esphome/components/rgbct/* @jesserockz
esphome/components/ring_buffer/* @kahrendt
esphome/components/router/speaker/* @kahrendt
esphome/components/rp2040/* @jesserockz
esphome/components/rp2040_ble/* @bdraco
esphome/components/rp2040_pio_led_strip/* @Papa-DMan

View File

View File

@@ -0,0 +1,123 @@
from esphome import automation, core
import esphome.codegen as cg
from esphome.components import audio, speaker
import esphome.config_validation as cv
from esphome.const import (
CONF_BITS_PER_SAMPLE,
CONF_ID,
CONF_NUM_CHANNELS,
CONF_OUTPUT_SPEAKER,
CONF_SAMPLE_RATE,
)
from esphome.core import ID
from esphome.cpp_generator import MockObj
from esphome.types import ConfigType, TemplateArgsType
CODEOWNERS = ["@kahrendt"]
CONF_OUTPUT_SPEAKERS = "output_speakers"
CONF_TARGET_SPEAKER = "target_speaker"
router_ns = cg.esphome_ns.namespace("router")
Router = router_ns.class_("Router", cg.Component, speaker.Speaker)
SwitchOutputAction = router_ns.class_("SwitchOutputAction", automation.Action)
SpeakerPtr = speaker.Speaker.operator("ptr")
def _set_stream_limits(config: ConfigType) -> ConfigType:
# Lock the router's stream limits to the user-declared format. Limits are set
# at CONFIG_SCHEMA time so they're visible to other components' FINAL_VALIDATE
# (which has no guaranteed ordering vs. ours).
audio.set_stream_limits(
min_bits_per_sample=config[CONF_BITS_PER_SAMPLE],
max_bits_per_sample=config[CONF_BITS_PER_SAMPLE],
min_channels=config[CONF_NUM_CHANNELS],
max_channels=config[CONF_NUM_CHANNELS],
min_sample_rate=config[CONF_SAMPLE_RATE],
max_sample_rate=config[CONF_SAMPLE_RATE],
)(config)
return config
CONFIG_SCHEMA = cv.All(
cv.Schema(
{
cv.GenerateID(): cv.declare_id(Router),
cv.Required(CONF_OUTPUT_SPEAKERS): cv.All(
cv.ensure_list(cv.use_id(speaker.Speaker)),
cv.Length(min=2, max=8),
),
# All outputs must agree on a single format so the producer can keep
# streaming through a switch without reconfiguring. These are required
# rather than inherited because downstream components (e.g. mixer)
# read them from the router's declaration during FINAL_VALIDATE,
# which can't depend on our FINAL_VALIDATE running first.
cv.Required(CONF_BITS_PER_SAMPLE): cv.int_range(8, 32),
cv.Required(CONF_NUM_CHANNELS): cv.int_range(1, 2),
cv.Required(CONF_SAMPLE_RATE): cv.int_range(8000, 96000),
}
).extend(cv.COMPONENT_SCHEMA),
cv.only_on_esp32,
_set_stream_limits,
)
def _final_validate(config: ConfigType) -> ConfigType:
# Validate every configured output speaker can accept the router's format.
# Switching to an output that can't reproduce the format the producer is
# already sending would otherwise fail silently at runtime.
for spk_id in config[CONF_OUTPUT_SPEAKERS]:
proxy = {**config, CONF_OUTPUT_SPEAKER: spk_id}
audio.final_validate_audio_schema(
"router",
audio_device=CONF_OUTPUT_SPEAKER,
bits_per_sample=config[CONF_BITS_PER_SAMPLE],
channels=config[CONF_NUM_CHANNELS],
sample_rate=config[CONF_SAMPLE_RATE],
)(proxy)
return config
FINAL_VALIDATE_SCHEMA = _final_validate
async def to_code(config: ConfigType) -> None:
var = cg.new_Pvariable(config[CONF_ID])
await cg.register_component(var, config)
# The first configured output is the default active output on boot.
speakers = config[CONF_OUTPUT_SPEAKERS]
cg.add(var.set_output_count(len(speakers)))
for spk_id in speakers:
spk = await cg.get_variable(spk_id)
cg.add(var.add_output(spk))
@automation.register_action(
"router.speaker.switch_output",
SwitchOutputAction,
cv.Schema(
{
cv.GenerateID(CONF_ID): cv.use_id(Router),
cv.Required(CONF_TARGET_SPEAKER): cv.templatable(
cv.use_id(speaker.Speaker)
),
}
),
synchronous=True,
)
async def switch_output_to_code(
config: ConfigType,
action_id: ID,
template_arg: cg.TemplateArguments,
args: TemplateArgsType,
) -> MockObj:
parent = await cg.get_variable(config[CONF_ID])
var = cg.new_Pvariable(action_id, template_arg, parent)
target = config[CONF_TARGET_SPEAKER]
if not isinstance(target, core.Lambda):
target = await cg.get_variable(target)
template_ = await cg.templatable(target, args, SpeakerPtr)
cg.add(var.set_target(template_))
return var

View File

@@ -0,0 +1,236 @@
#include "router_speaker.h"
#ifdef USE_ESP32
#include "esphome/core/log.h"
#include "esp_timer.h"
#include <algorithm>
namespace esphome::router {
static const char *const TAG = "router.speaker";
static inline uint32_t atomic_subtract_clamped(std::atomic<uint32_t> &var, uint32_t amount) {
uint32_t current = var.load(std::memory_order_acquire);
uint32_t subtracted = 0;
if (current > 0) {
uint32_t new_value;
do {
subtracted = std::min(amount, current);
new_value = current - subtracted;
} while (!var.compare_exchange_weak(current, new_value, std::memory_order_release, std::memory_order_acquire));
}
return subtracted;
}
void Router::setup() {
// Register a callback on every configured output. Each lambda captures its own
// index and only forwards when that output is the active one. This is required
// because CallbackManager has no remove() API.
for (size_t i = 0; i < this->outputs_.size(); i++) {
this->outputs_[i]->add_audio_output_callback([this, i](uint32_t frames, int64_t timestamp_us) {
// Always suppress the draining previous output during a switch, even if it's
// also the reselected active output (switching back to the bus holder).
// loop() fires one synthetic credit for its in-flight frames instead.
if (this->pending_start_prev_idx_.load(std::memory_order_relaxed) == static_cast<int8_t>(i)) {
return;
}
if (this->active_output_idx_.load(std::memory_order_relaxed) != static_cast<int8_t>(i)) {
return;
}
atomic_subtract_clamped(this->frames_in_pipeline_, frames);
this->audio_output_callback_.call(frames, timestamp_us);
});
}
}
void Router::loop() {
speaker::Speaker *active = this->get_active_output();
// Mid-switch: the new output's start() is deferred until the previous output
// fully releases shared hardware (e.g. a single i2s_audio bus driving two
// speakers). Starting earlier produces "Parent bus is busy" retries. The
// synthetic-credit callback is also deferred until prev is fully stopped, so
// that once its task has drained no natural callbacks can race ours.
const int8_t pending_prev_idx = this->pending_start_prev_idx_.load(std::memory_order_relaxed);
if (pending_prev_idx >= 0) {
speaker::Speaker *prev = this->outputs_[pending_prev_idx];
if (prev->is_stopped()) {
this->pending_start_prev_idx_.store(-1, std::memory_order_relaxed);
// Credit any frames left in prev's ring buffer / DMA so producer frame
// accounting (SpeakerSourceMediaPlayer pending_frames, sendspin/AEC
// clocks) clears cleanly. The leftover audio is intentionally dropped and
// the producer is told it played "now", giving a clean discontinuity that
// keeps frame accounting consistent across the switch.
const uint32_t in_flight = this->frames_in_pipeline_.exchange(0, std::memory_order_acq_rel);
if (in_flight > 0) {
this->audio_output_callback_.call(in_flight, esp_timer_get_time());
}
this->apply_cached_state_to_active_();
this->state_ = speaker::STATE_STARTING;
active->start();
}
return;
}
// Mirror the active output's running/stopped state into our own state_ so that
// is_running() / is_stopped() stay accurate from the producer's perspective.
// Also catch the active output self-stopping (e.g. i2s_audio silence timeout):
// without this, our state_ would stay RUNNING forever and the next play() would
// skip start(). The output retains its own volume/mute across a restart (and we
// forward those live regardless), but stream info arrives via the non-virtual
// set_audio_stream_info() and never reaches the output on its own; if the format
// changed while stopped, only start()'s apply_cached_state_to_active_() pushes it
// down before the output's play()-side auto-start locks in the stale format.
if (active->is_stopped()) {
this->state_ = speaker::STATE_STOPPED;
} else if (this->state_ == speaker::STATE_STARTING && active->is_running()) {
this->state_ = speaker::STATE_RUNNING;
}
}
void Router::dump_config() {
ESP_LOGCONFIG(TAG,
"Router Speaker:\n"
" Outputs: %u",
static_cast<unsigned>(this->outputs_.size()));
}
size_t Router::play(const uint8_t *data, size_t length, TickType_t ticks_to_wait) {
speaker::Speaker *active = this->get_active_output();
// Drop frames during a mid-switch until the old output releases shared hardware;
// forwarding now would trigger the new output's play()-side auto-start while
// the bus is still busy.
if (this->pending_start_prev_idx_.load(std::memory_order_relaxed) >= 0) {
vTaskDelay(ticks_to_wait);
return 0;
}
// Producers (e.g. mixer) set stream info on us and then drive play() from a
// task without ever calling our start(). i2s_audio's play() auto-starts the
// underlying driver, so we must push our cached stream info to the active
// output before that auto-start, or it locks to its default (16k mono).
if (this->state_ == speaker::STATE_STOPPED) {
this->start();
vTaskDelay(ticks_to_wait);
ticks_to_wait = 0;
}
size_t written = active->play(data, length, ticks_to_wait);
if (written > 0) {
const uint32_t frames = this->audio_stream_info_.bytes_to_frames(written);
this->frames_in_pipeline_.fetch_add(frames, std::memory_order_release);
}
return written;
}
void Router::start() {
this->frames_in_pipeline_.store(0, std::memory_order_release);
this->apply_cached_state_to_active_();
this->state_ = speaker::STATE_STARTING;
this->get_active_output()->start();
}
void Router::stop() {
// Cancel any pending mid-switch start; the producer wants us stopped.
this->pending_start_prev_idx_.store(-1, std::memory_order_relaxed);
this->state_ = speaker::STATE_STOPPING;
this->get_active_output()->stop();
}
void Router::finish() {
this->pending_start_prev_idx_.store(-1, std::memory_order_relaxed);
this->state_ = speaker::STATE_STOPPING;
this->get_active_output()->finish();
}
bool Router::has_buffered_data() const { return this->get_active_output()->has_buffered_data(); }
void Router::set_pause_state(bool pause_state) {
this->cached_pause_ = pause_state;
this->get_active_output()->set_pause_state(pause_state);
}
void Router::set_volume(float volume) {
this->volume_ = volume;
this->get_active_output()->set_volume(volume);
}
void Router::set_mute_state(bool mute_state) {
this->mute_state_ = mute_state;
this->get_active_output()->set_mute_state(mute_state);
}
bool Router::switch_to_output(speaker::Speaker *target) {
if (target == nullptr) {
return false;
}
int8_t new_idx = -1;
for (size_t i = 0; i < this->outputs_.size(); i++) {
if (this->outputs_[i] == target) {
new_idx = static_cast<int8_t>(i);
break;
}
}
if (new_idx < 0) {
ESP_LOGW(TAG, "Switch target is not a configured output");
return false;
}
if (new_idx == this->active_output_idx_.load(std::memory_order_relaxed)) {
return true;
}
// A switch is already in flight: pending_start_prev_idx_ is still releasing the
// shared bus and the current active output's start() is still deferred (it never
// started). Just redirect which output we start once the bus frees. Leave the bus
// holder (pending_start_prev_idx_), the in-flight frame counter (loop() still owes one
// synthetic credit for the bus holder's in-flight frames), and state_ alone, and
// don't stop the current active output, which never started.
if (this->pending_start_prev_idx_.load(std::memory_order_relaxed) >= 0) {
this->active_output_idx_.store(new_idx, std::memory_order_relaxed);
return true;
}
const bool was_active = (this->state_ == speaker::STATE_STARTING || this->state_ == speaker::STATE_RUNNING);
const int8_t old_idx = this->active_output_idx_.load(std::memory_order_relaxed);
if (was_active) {
this->outputs_[old_idx]->stop();
}
this->active_output_idx_.store(new_idx, std::memory_order_relaxed);
if (was_active) {
// Defer start and the synthetic-credit callback until the old output's
// task is fully stopped; loop() handles both. Firing the synthetic credit
// here would race the old task's still-in-flight natural callbacks,
// dispatching audio_output_callback_ concurrently from two threads, which
// some consumers (e.g. sendspin's progress sync) aren't reentrant-safe for.
// STATE_STOPPING keeps producers from observing a transient stopped state
// and lets our play() short-circuit so the new output's play() doesn't
// auto-start it while the shared bus is still being released.
this->state_ = speaker::STATE_STOPPING;
this->pending_start_prev_idx_.store(old_idx, std::memory_order_relaxed);
} else {
this->frames_in_pipeline_.store(0, std::memory_order_release);
}
return true;
}
void Router::apply_cached_state_to_active_() {
speaker::Speaker *active = this->get_active_output();
active->set_audio_stream_info(this->audio_stream_info_);
active->set_volume(this->volume_);
active->set_mute_state(this->mute_state_);
active->set_pause_state(this->cached_pause_);
}
} // namespace esphome::router
#endif // USE_ESP32

View File

@@ -0,0 +1,92 @@
#pragma once
#ifdef USE_ESP32
#include "esphome/components/speaker/speaker.h"
#include "esphome/core/automation.h"
#include "esphome/core/component.h"
#include "esphome/core/helpers.h"
#include <freertos/FreeRTOS.h>
#include <atomic>
namespace esphome::router {
class Router : public Component, public speaker::Speaker {
public:
float get_setup_priority() const override { return setup_priority::DATA; }
void setup() override;
void loop() override;
void dump_config() override;
size_t play(const uint8_t *data, size_t length) override { return this->play(data, length, 0); }
size_t play(const uint8_t *data, size_t length, TickType_t ticks_to_wait) override;
void start() override;
void stop() override;
void finish() override;
bool has_buffered_data() const override;
void set_pause_state(bool pause_state) override;
bool get_pause_state() const override { return this->cached_pause_; }
void set_volume(float volume) override;
float get_volume() override { return this->volume_; }
void set_mute_state(bool mute_state) override;
bool get_mute_state() override { return this->mute_state_; }
// Allocate the output list to its final size. Must be called before add_output().
void set_output_count(size_t count) { this->outputs_.init(count); }
void add_output(speaker::Speaker *spk) { this->outputs_.push_back(spk); }
/// Switch the active output to the given speaker. Must be one of the configured outputs.
/// Returns false if `target` is not in the output list.
bool switch_to_output(speaker::Speaker *target);
// Always valid: active_output_idx_ stays within [0, outputs_.size()) and at least
// two outputs are required (validated in Python), so this never returns null.
speaker::Speaker *get_active_output() const {
return this->outputs_[this->active_output_idx_.load(std::memory_order_relaxed)];
}
protected:
// Frames written to the active output but not yet played: incremented in play() and decremented
// (clamped at zero) by the active output's audio_output_callback. Mirrors mixer_speaker's
// frames_in_pipeline_.
std::atomic<uint32_t> frames_in_pipeline_{0};
bool cached_pause_{false};
void apply_cached_state_to_active_();
// Index of the previously-active output we're waiting on to fully stop before
// starting the new one. -1 means no pending start. Set by switch_to_output()
// when switching mid-playback; cleared by loop() once the old output reports
// is_stopped(). Required because shared-bus drivers (e.g. two i2s_audio
// speakers on one i2s_bus) reject start() until the previous user releases.
std::atomic<int8_t> pending_start_prev_idx_{-1};
private:
FixedVector<speaker::Speaker *> outputs_;
// Index into outputs_, always within [0, outputs_.size()). Defaults to the first
// configured output; updated by switch_to_output().
std::atomic<int8_t> active_output_idx_{0};
};
template<typename... Ts> class SwitchOutputAction : public Action<Ts...> {
public:
explicit SwitchOutputAction(Router *parent) : parent_(parent) {}
TEMPLATABLE_VALUE(speaker::Speaker *, target)
void play(const Ts &...x) override { this->parent_->switch_to_output(this->target_.value(x...)); }
protected:
Router *parent_;
};
} // namespace esphome::router
#endif // USE_ESP32

View File

@@ -0,0 +1,44 @@
esphome:
on_boot:
then:
- router.speaker.switch_output:
id: router_id
target_speaker: speaker_b_id
# id omitted: auto-resolved since there's a single router instance
- router.speaker.switch_output:
target_speaker: !lambda return id(speaker_a_id);
i2s_audio:
- id: i2s_a
i2s_lrclk_pin: ${a_lrclk_pin}
i2s_bclk_pin: ${a_bclk_pin}
- id: i2s_b
speaker:
- platform: i2s_audio
id: speaker_a_id
i2s_audio_id: i2s_a
dac_type: external
i2s_dout_pin: ${a_dout_pin}
sample_rate: 48000
bits_per_sample: 16bit
channel: stereo
- platform: i2s_audio
id: speaker_b_id
i2s_audio_id: i2s_b
dac_type: external
i2s_dout_pin: ${b_dout_pin}
spdif_mode: true
use_apll: true
sample_rate: 48000
bits_per_sample: 16bit
channel: stereo
i2s_mode: primary
- platform: router
id: router_id
output_speakers:
- speaker_a_id
- speaker_b_id
sample_rate: 48000
bits_per_sample: 16
num_channels: 2

View File

@@ -0,0 +1,7 @@
substitutions:
a_lrclk_pin: GPIO4
a_bclk_pin: GPIO5
a_dout_pin: GPIO14
b_dout_pin: GPIO19
<<: !include common.yaml