From d759f1a56751207689f5db024181739c96d94646 Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Thu, 23 Apr 2026 16:53:52 -0400 Subject: [PATCH] [audio_http] Add a media source for playing audio from HTTP URLs (#15741) Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- CODEOWNERS | 1 + esphome/components/audio_http/__init__.py | 0 .../audio_http/audio_http_media_source.cpp | 163 ++++++++++++++++++ .../audio_http/audio_http_media_source.h | 59 +++++++ esphome/components/audio_http/media_source.py | 59 +++++++ tests/components/audio_http/common.yaml | 7 + .../components/audio_http/test.esp32-idf.yaml | 1 + 7 files changed, 290 insertions(+) create mode 100644 esphome/components/audio_http/__init__.py create mode 100644 esphome/components/audio_http/audio_http_media_source.cpp create mode 100644 esphome/components/audio_http/audio_http_media_source.h create mode 100644 esphome/components/audio_http/media_source.py create mode 100644 tests/components/audio_http/common.yaml create mode 100644 tests/components/audio_http/test.esp32-idf.yaml diff --git a/CODEOWNERS b/CODEOWNERS index 69f2cb1d17..be835aae3d 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -56,6 +56,7 @@ esphome/components/audio_adc/* @kbx81 esphome/components/audio_dac/* @kbx81 esphome/components/audio_file/* @kahrendt esphome/components/audio_file/media_source/* @kahrendt +esphome/components/audio_http/* @kahrendt esphome/components/axs15231/* @clydebarrow esphome/components/b_parasite/* @rbaron esphome/components/ballu/* @bazuchan diff --git a/esphome/components/audio_http/__init__.py b/esphome/components/audio_http/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/esphome/components/audio_http/audio_http_media_source.cpp b/esphome/components/audio_http/audio_http_media_source.cpp new file mode 100644 index 0000000000..04b7d046e6 --- /dev/null +++ b/esphome/components/audio_http/audio_http_media_source.cpp @@ -0,0 +1,163 @@ +#include "audio_http_media_source.h" + +#ifdef USE_ESP32 + +#include "esphome/core/log.h" + +#include +#include + +#include + +namespace esphome::audio_http { + +static const char *const TAG = "audio_http_media_source"; + +// Decoder task / buffer tuning. Kept here as constants so the header stays free of magic numbers. +static constexpr size_t DEFAULT_TRANSFER_BUFFER_SIZE = 8 * 1024; // Staging buffer between HTTP reader and decoder +static constexpr uint32_t HTTP_TIMEOUT_MS = 5000; // HTTP connect/read timeout +static constexpr uint32_t AUDIO_WRITE_TIMEOUT_MS = 50; // Max blocking time per on_audio_write() call +static constexpr uint32_t READER_WRITE_TIMEOUT_MS = 50; // Max blocking time when writing into the ring buffer +static constexpr uint8_t READER_TASK_PRIORITY = 2; +static constexpr uint8_t DECODER_TASK_PRIORITY = 2; +static constexpr size_t READER_TASK_STACK_SIZE = 4096; +static constexpr size_t DECODER_TASK_STACK_SIZE = 5120; +static constexpr uint32_t PAUSE_POLL_DELAY_MS = 20; +static constexpr const char *const HTTP_URI_PREFIX = "http://"; +static constexpr const char *const HTTPS_URI_PREFIX = "https://"; + +void AudioHTTPMediaSource::dump_config() { + ESP_LOGCONFIG(TAG, + "Audio HTTP Media Source:\n" + " Buffer Size: %zu bytes\n" + " Decoder Task Stack in PSRAM: %s", + this->buffer_size_, YESNO(this->decoder_task_stack_in_psram_)); +} + +void AudioHTTPMediaSource::setup() { + this->disable_loop(); + + micro_decoder::DecoderConfig config; + config.ring_buffer_size = this->buffer_size_; + // Keep the transfer buffer smaller than the ring buffer so the reader can top up the ring + // while the decoder is still draining it, instead of oscillating between empty and full. + config.transfer_buffer_size = std::min(DEFAULT_TRANSFER_BUFFER_SIZE, this->buffer_size_ / 2); + config.http_timeout_ms = HTTP_TIMEOUT_MS; + config.audio_write_timeout_ms = AUDIO_WRITE_TIMEOUT_MS; + config.reader_write_timeout_ms = READER_WRITE_TIMEOUT_MS; + config.reader_priority = READER_TASK_PRIORITY; + config.decoder_priority = DECODER_TASK_PRIORITY; + config.reader_stack_size = READER_TASK_STACK_SIZE; + config.decoder_stack_size = DECODER_TASK_STACK_SIZE; + config.decoder_stack_in_psram = this->decoder_task_stack_in_psram_; + + this->decoder_ = std::make_unique(config); + if (this->decoder_ == nullptr) { + ESP_LOGE(TAG, "Failed to allocate decoder"); + this->mark_failed(); + return; + } + this->decoder_->set_listener(this); // We inherit from micro_decoder::DecoderListener +} + +void AudioHTTPMediaSource::loop() { this->decoder_->loop(); } + +bool AudioHTTPMediaSource::can_handle(const std::string &uri) const { + return uri.starts_with(HTTP_URI_PREFIX) || uri.starts_with(HTTPS_URI_PREFIX); +} + +// Called from the orchestrator's main loop, so no synchronization needed with loop() +bool AudioHTTPMediaSource::play_uri(const std::string &uri) { + if (!this->is_ready() || this->is_failed() || this->status_has_error() || !this->has_listener()) { + return false; + } + + // Check if source is already playing + if (this->get_state() != media_source::MediaSourceState::IDLE) { + ESP_LOGE(TAG, "Cannot play '%s': source is busy", uri.c_str()); + return false; + } + + // Validate URI starts with "http://" or "https://" + if (!uri.starts_with(HTTP_URI_PREFIX) && !uri.starts_with(HTTPS_URI_PREFIX)) { + ESP_LOGE(TAG, "Invalid URI: '%s'", uri.c_str()); + return false; + } + + if (this->decoder_->play_url(uri)) { + this->pause_.store(false, std::memory_order_relaxed); + this->enable_loop(); + return true; + } + + ESP_LOGE(TAG, "Failed to start playback of '%s'", uri.c_str()); + return false; +} + +// Called from the orchestrator's main loop, so no synchronization needed with loop() +void AudioHTTPMediaSource::handle_command(media_source::MediaSourceCommand command) { + switch (command) { + case media_source::MediaSourceCommand::STOP: + this->decoder_->stop(); + break; + case media_source::MediaSourceCommand::PAUSE: + // Only valid while actively playing; ignoring from IDLE/ERROR/PAUSED prevents the state + // machine from getting stuck in PAUSED when no playback is active (which would block the + // next play_uri() call via its IDLE-state precondition). + if (this->get_state() != media_source::MediaSourceState::PLAYING) + break; + // PAUSE does not stop the decoder task. Instead, on_audio_write() returns 0 and temporarily + // yields, which fills the ring buffer and applies back pressure that effectively pauses both + // the decoder and HTTP reader tasks. + this->set_state_(media_source::MediaSourceState::PAUSED); + this->pause_.store(true, std::memory_order_relaxed); + break; + case media_source::MediaSourceCommand::PLAY: + // Only resume from PAUSED; don't fabricate a PLAYING state from IDLE/ERROR. + if (this->get_state() != media_source::MediaSourceState::PAUSED) + break; + this->set_state_(media_source::MediaSourceState::PLAYING); + this->pause_.store(false, std::memory_order_relaxed); + break; + default: + break; + } +} + +// Called from the decoder task. Forwards to the orchestrator's listener, which is responsible for +// being thread-safe with respect to its own audio writer. +size_t AudioHTTPMediaSource::on_audio_write(const uint8_t *data, size_t length, uint32_t timeout_ms) { + if (this->pause_.load(std::memory_order_relaxed)) { + vTaskDelay(pdMS_TO_TICKS(PAUSE_POLL_DELAY_MS)); + return 0; + } + return this->write_output(data, length, timeout_ms, this->stream_info_); +} + +// Called from the decoder task before the first on_audio_write(). +void AudioHTTPMediaSource::on_stream_info(const micro_decoder::AudioStreamInfo &info) { + this->stream_info_ = audio::AudioStreamInfo(info.get_bits_per_sample(), info.get_channels(), info.get_sample_rate()); +} + +// microDecoder invokes on_state_change() from inside decoder_->loop(), so this runs on the main +// loop thread and it's safe to call set_state_() directly. +void AudioHTTPMediaSource::on_state_change(micro_decoder::DecoderState state) { + switch (state) { + case micro_decoder::DecoderState::IDLE: + this->set_state_(media_source::MediaSourceState::IDLE); + this->disable_loop(); + break; + case micro_decoder::DecoderState::PLAYING: + this->set_state_(media_source::MediaSourceState::PLAYING); + break; + case micro_decoder::DecoderState::FAILED: + this->set_state_(media_source::MediaSourceState::ERROR); + break; + default: + break; + } +} + +} // namespace esphome::audio_http + +#endif // USE_ESP32 diff --git a/esphome/components/audio_http/audio_http_media_source.h b/esphome/components/audio_http/audio_http_media_source.h new file mode 100644 index 0000000000..e4bd69e9e6 --- /dev/null +++ b/esphome/components/audio_http/audio_http_media_source.h @@ -0,0 +1,59 @@ +#pragma once + +#include "esphome/core/defines.h" + +#ifdef USE_ESP32 + +#include "esphome/components/audio/audio.h" +#include "esphome/components/media_source/media_source.h" +#include "esphome/core/component.h" + +#include +#include + +#include +#include +#include + +namespace esphome::audio_http { + +// Inherits from two unrelated listener-style interfaces: +// - media_source::MediaSource: this source reports state and writes audio *to* an orchestrator +// (the orchestrator calls set_listener() on us with a MediaSourceListener*). +// - micro_decoder::DecoderListener: the underlying decoder calls back *into* us with decoded +// audio and state changes (we call decoder_->set_listener(this) in setup()). +// The two set_listener() methods live on different base classes and serve opposite directions. +class AudioHTTPMediaSource : public Component, public media_source::MediaSource, public micro_decoder::DecoderListener { + public: + void setup() override; + void loop() override; + void dump_config() override; + + void set_buffer_size(size_t buffer_size) { this->buffer_size_ = buffer_size; } + void set_task_stack_in_psram(bool task_stack_in_psram) { this->decoder_task_stack_in_psram_ = task_stack_in_psram; } + + // MediaSource interface implementation + bool play_uri(const std::string &uri) override; + void handle_command(media_source::MediaSourceCommand command) override; + bool can_handle(const std::string &uri) const override; + + // DecoderListener interface implementation + size_t on_audio_write(const uint8_t *data, size_t length, uint32_t timeout_ms) override; + void on_stream_info(const micro_decoder::AudioStreamInfo &info) override; + void on_state_change(micro_decoder::DecoderState state) override; + + protected: + std::unique_ptr decoder_; + audio::AudioStreamInfo stream_info_; + + size_t buffer_size_{50000}; + + // Written from the main loop in handle_command(), read from the decoder task in + // on_audio_write(). Must be atomic to avoid a data race. + std::atomic pause_{false}; + bool decoder_task_stack_in_psram_{false}; +}; + +} // namespace esphome::audio_http + +#endif // USE_ESP32 diff --git a/esphome/components/audio_http/media_source.py b/esphome/components/audio_http/media_source.py new file mode 100644 index 0000000000..519d8df698 --- /dev/null +++ b/esphome/components/audio_http/media_source.py @@ -0,0 +1,59 @@ +from typing import Any + +import esphome.codegen as cg +from esphome.components import audio, esp32, media_source, psram +import esphome.config_validation as cv +from esphome.const import CONF_BUFFER_SIZE, CONF_ID, CONF_TASK_STACK_IN_PSRAM +from esphome.types import ConfigType + +CODEOWNERS = ["@kahrendt"] +AUTO_LOAD = ["audio"] + +audio_http_ns = cg.esphome_ns.namespace("audio_http") +AudioHTTPMediaSource = audio_http_ns.class_( + "AudioHTTPMediaSource", cg.Component, media_source.MediaSource +) + + +def _request_micro_decoder(config: ConfigType) -> ConfigType: + audio.request_micro_decoder_support() + return config + + +def _validate_task_stack_in_psram(value: Any) -> bool: + # Only require the psram component when actually enabling PSRAM stacks; validating + # the boolean first means `false` doesn't trigger the requires_component check. + if value := cv.boolean(value): + return cv.requires_component(psram.DOMAIN)(value) + return value + + +CONFIG_SCHEMA = cv.All( + media_source.media_source_schema( + AudioHTTPMediaSource, + ) + .extend( + { + cv.Optional(CONF_BUFFER_SIZE, default=50000): cv.int_range( + min=5000, max=1000000 + ), + cv.Optional(CONF_TASK_STACK_IN_PSRAM): _validate_task_stack_in_psram, + } + ) + .extend(cv.COMPONENT_SCHEMA), + cv.only_on_esp32, + _request_micro_decoder, +) + + +async def to_code(config: ConfigType) -> None: + var = cg.new_Pvariable(config[CONF_ID]) + await cg.register_component(var, config) + await media_source.register_media_source(var, config) + + if config.get(CONF_TASK_STACK_IN_PSRAM): + cg.add(var.set_task_stack_in_psram(True)) + esp32.add_idf_sdkconfig_option( + "CONFIG_SPIRAM_ALLOW_STACK_EXTERNAL_MEMORY", True + ) + cg.add(var.set_buffer_size(config[CONF_BUFFER_SIZE])) diff --git a/tests/components/audio_http/common.yaml b/tests/components/audio_http/common.yaml new file mode 100644 index 0000000000..b7457165a5 --- /dev/null +++ b/tests/components/audio_http/common.yaml @@ -0,0 +1,7 @@ +psram: + +media_source: + - platform: audio_http + id: audio_http_source + buffer_size: 100000 + task_stack_in_psram: true diff --git a/tests/components/audio_http/test.esp32-idf.yaml b/tests/components/audio_http/test.esp32-idf.yaml new file mode 100644 index 0000000000..dade44d145 --- /dev/null +++ b/tests/components/audio_http/test.esp32-idf.yaml @@ -0,0 +1 @@ +<<: !include common.yaml