From a8b0133ec115984ed05b8129a6e85dbc56e93166 Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Thu, 30 Apr 2026 08:49:28 -0400 Subject: [PATCH] [audio] Enable specific codecs and configure advanced features (#16166) --- esphome/components/audio/__init__.py | 196 ++++++++++++++++++++- tests/components/audio/common.yaml | 14 ++ tests/components/audio/test.esp32-idf.yaml | 1 + 3 files changed, 207 insertions(+), 4 deletions(-) create mode 100644 tests/components/audio/common.yaml create mode 100644 tests/components/audio/test.esp32-idf.yaml diff --git a/esphome/components/audio/__init__.py b/esphome/components/audio/__init__.py index fe111be31e..db8f69e6a5 100644 --- a/esphome/components/audio/__init__.py +++ b/esphome/components/audio/__init__.py @@ -1,4 +1,4 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field import esphome.codegen as cg from esphome.components.esp32 import ( @@ -7,7 +7,12 @@ from esphome.components.esp32 import ( include_builtin_idf_component, ) import esphome.config_validation as cv -from esphome.const import CONF_BITS_PER_SAMPLE, CONF_NUM_CHANNELS, CONF_SAMPLE_RATE +from esphome.const import ( + CONF_BITS_PER_SAMPLE, + CONF_NUM_CHANNELS, + CONF_SAMPLE_RATE, + CONF_SIZE, +) from esphome.core import CORE import esphome.final_validate as fv @@ -25,13 +30,46 @@ AUDIO_FILE_TYPE_ENUM = { "OPUS": AudioFileType.OPUS, } +MEMORY_PSRAM = "psram" +MEMORY_INTERNAL = "internal" +MEMORY_LOCATIONS = [MEMORY_PSRAM, MEMORY_INTERNAL] + + +@dataclass +class FlacOptions: + buffer_memory: str | None = None + + +@dataclass +class Mp3Options: + buffer_memory: str | None = None + + +@dataclass +class OpusPseudostackOptions: + threadsafe: bool | None = None + buffer_memory: str | None = None + size: int | None = None + + +@dataclass +class OpusOptions: + floating_point: bool | None = None + state_memory: str | None = None + pseudostack: OpusPseudostackOptions = field(default_factory=OpusPseudostackOptions) + @dataclass class AudioData: flac_support: bool = False mp3_support: bool = False opus_support: bool = False + # WAV defaults to True for backward compatibility; will become opt-in in a future release + wav_support: bool = True micro_decoder_support: bool = False + flac: FlacOptions = field(default_factory=FlacOptions) + mp3: Mp3Options = field(default_factory=Mp3Options) + opus: OpusOptions = field(default_factory=OpusOptions) def _get_data() -> AudioData: @@ -55,6 +93,11 @@ def request_opus_support() -> None: _get_data().opus_support = True +def request_wav_support() -> None: + """Request WAV codec support for audio decoding.""" + _get_data().wav_support = True + + def request_micro_decoder_support() -> None: """Request micro-decoder library support for audio decoding.""" _get_data().micro_decoder_support = True @@ -67,9 +110,78 @@ CONF_MAX_CHANNELS = "max_channels" CONF_MIN_SAMPLE_RATE = "min_sample_rate" CONF_MAX_SAMPLE_RATE = "max_sample_rate" +CONF_CODECS = "codecs" +CONF_WAV = "wav" +CONF_FLAC = "flac" +CONF_MP3 = "mp3" +CONF_OPUS = "opus" +CONF_BUFFER_MEMORY = "buffer_memory" +CONF_FLOATING_POINT = "floating_point" +CONF_STATE_MEMORY = "state_memory" +CONF_PSEUDOSTACK = "pseudostack" +CONF_THREADSAFE = "threadsafe" + + +_MEMORY_LOCATION_VALIDATOR = cv.one_of(*MEMORY_LOCATIONS, lower=True) + + +def _maybe_empty_codec(schema): + """Wrap a codec dict schema so that a bare key (None value) is treated as an empty dict.""" + + def validator(value): + if value is None: + value = {} + return schema(value) + + return validator + + +CODEC_FLAC_SCHEMA = cv.Schema( + { + cv.Optional(CONF_BUFFER_MEMORY): _MEMORY_LOCATION_VALIDATOR, + } +) + +CODEC_MP3_SCHEMA = cv.Schema( + { + cv.Optional(CONF_BUFFER_MEMORY): _MEMORY_LOCATION_VALIDATOR, + } +) + +OPUS_PSEUDOSTACK_SCHEMA = cv.Schema( + { + cv.Optional(CONF_THREADSAFE): cv.boolean, + cv.Optional(CONF_BUFFER_MEMORY): _MEMORY_LOCATION_VALIDATOR, + cv.Optional(CONF_SIZE): cv.int_range(60000, 240000), + } +) + +CODEC_OPUS_SCHEMA = cv.Schema( + { + cv.Optional(CONF_FLOATING_POINT): cv.boolean, + cv.Optional(CONF_STATE_MEMORY): _MEMORY_LOCATION_VALIDATOR, + cv.Optional(CONF_PSEUDOSTACK): _maybe_empty_codec(OPUS_PSEUDOSTACK_SCHEMA), + } +) + +CODEC_WAV_SCHEMA = cv.Schema({}) + +CODECS_SCHEMA = cv.Schema( + { + cv.Optional(CONF_FLAC): _maybe_empty_codec(CODEC_FLAC_SCHEMA), + cv.Optional(CONF_MP3): _maybe_empty_codec(CODEC_MP3_SCHEMA), + cv.Optional(CONF_OPUS): _maybe_empty_codec(CODEC_OPUS_SCHEMA), + cv.Optional(CONF_WAV): _maybe_empty_codec(CODEC_WAV_SCHEMA), + } +) CONFIG_SCHEMA = cv.All( - cv.Schema({}), + cv.Schema( + { + cv.Optional(CONF_CODECS): _maybe_empty_codec(CODECS_SCHEMA), + } + ), + cv.only_on_esp32, ) AUDIO_COMPONENT_SCHEMA = cv.Schema( @@ -208,6 +320,15 @@ def final_validate_audio_schema( ) +def _emit_memory_pair(value: str | None, psram_key: str, internal_key: str) -> None: + if value == MEMORY_PSRAM: + add_idf_sdkconfig_option(psram_key, True) + add_idf_sdkconfig_option(internal_key, False) + elif value == MEMORY_INTERNAL: + add_idf_sdkconfig_option(psram_key, False) + add_idf_sdkconfig_option(internal_key, True) + + async def to_code(config): # Re-enable ESP-IDF's HTTP client (excluded by default to save compile time) include_builtin_idf_component("esp_http_client") @@ -219,6 +340,36 @@ async def to_code(config): data = _get_data() + # Merge user-supplied codec configuration (additive: presence enables the codec) + if codecs_config := config.get(CONF_CODECS): + if (flac_config := codecs_config.get(CONF_FLAC)) is not None: + data.flac_support = True + if (buffer_memory := flac_config.get(CONF_BUFFER_MEMORY)) is not None: + data.flac.buffer_memory = buffer_memory + if (mp3_config := codecs_config.get(CONF_MP3)) is not None: + data.mp3_support = True + if (buffer_memory := mp3_config.get(CONF_BUFFER_MEMORY)) is not None: + data.mp3.buffer_memory = buffer_memory + if (opus_config := codecs_config.get(CONF_OPUS)) is not None: + data.opus_support = True + floating_point = opus_config.get(CONF_FLOATING_POINT) + if floating_point is not None: + data.opus.floating_point = floating_point + if (state_memory := opus_config.get(CONF_STATE_MEMORY)) is not None: + data.opus.state_memory = state_memory + if (pseudostack_config := opus_config.get(CONF_PSEUDOSTACK)) is not None: + threadsafe = pseudostack_config.get(CONF_THREADSAFE) + if threadsafe is not None: + data.opus.pseudostack.threadsafe = threadsafe + if ( + buffer_memory := pseudostack_config.get(CONF_BUFFER_MEMORY) + ) is not None: + data.opus.pseudostack.buffer_memory = buffer_memory + if (size := pseudostack_config.get(CONF_SIZE)) is not None: + data.opus.pseudostack.size = size + if CONF_WAV in codecs_config: + data.wav_support = True + if data.micro_decoder_support: add_idf_component(name="esphome/micro-decoder", ref="0.2.0") @@ -229,13 +380,50 @@ async def to_code(config): add_idf_sdkconfig_option("CONFIG_MICRO_DECODER_CODEC_MP3", False) if not data.opus_support: add_idf_sdkconfig_option("CONFIG_MICRO_DECODER_CODEC_OPUS", False) + if not data.wav_support: + add_idf_sdkconfig_option("CONFIG_MICRO_DECODER_CODEC_WAV", False) - # Legacy audio_decoder.cpp support defines and components + # Configure each codec library. + # Adds a define and IDF component for legacy `audio_decoder.cpp`. if data.flac_support: cg.add_define("USE_AUDIO_FLAC_SUPPORT") add_idf_component(name="esphome/micro-flac", ref="0.1.1") + _emit_memory_pair( + data.flac.buffer_memory, + "CONFIG_MICRO_FLAC_PREFER_PSRAM", + "CONFIG_MICRO_FLAC_PREFER_INTERNAL", + ) if data.mp3_support: cg.add_define("USE_AUDIO_MP3_SUPPORT") + _emit_memory_pair( + data.mp3.buffer_memory, + "CONFIG_MP3_DECODER_PREFER_PSRAM", + "CONFIG_MP3_DECODER_PREFER_INTERNAL", + ) if data.opus_support: cg.add_define("USE_AUDIO_OPUS_SUPPORT") add_idf_component(name="esphome/micro-opus", ref="0.3.6") + if data.opus.floating_point is not None: + add_idf_sdkconfig_option( + "CONFIG_OPUS_FLOATING_POINT", data.opus.floating_point + ) + _emit_memory_pair( + data.opus.state_memory, + "CONFIG_OPUS_STATE_PREFER_PSRAM", + "CONFIG_OPUS_STATE_PREFER_INTERNAL", + ) + if data.opus.pseudostack.threadsafe is True: + add_idf_sdkconfig_option("CONFIG_OPUS_THREADSAFE_PSEUDOSTACK", True) + add_idf_sdkconfig_option("CONFIG_OPUS_NONTHREADSAFE_PSEUDOSTACK", False) + elif data.opus.pseudostack.threadsafe is False: + add_idf_sdkconfig_option("CONFIG_OPUS_THREADSAFE_PSEUDOSTACK", False) + add_idf_sdkconfig_option("CONFIG_OPUS_NONTHREADSAFE_PSEUDOSTACK", True) + _emit_memory_pair( + data.opus.pseudostack.buffer_memory, + "CONFIG_OPUS_PSEUDOSTACK_PREFER_PSRAM", + "CONFIG_OPUS_PSEUDOSTACK_PREFER_INTERNAL", + ) + if data.opus.pseudostack.size is not None: + add_idf_sdkconfig_option( + "CONFIG_OPUS_PSEUDOSTACK_SIZE", data.opus.pseudostack.size + ) diff --git a/tests/components/audio/common.yaml b/tests/components/audio/common.yaml new file mode 100644 index 0000000000..3cde9b8449 --- /dev/null +++ b/tests/components/audio/common.yaml @@ -0,0 +1,14 @@ +audio: + codecs: + flac: + buffer_memory: internal + mp3: + buffer_memory: psram + opus: + floating_point: false + state_memory: psram + pseudostack: + threadsafe: false + buffer_memory: internal + size: 80000 + wav: diff --git a/tests/components/audio/test.esp32-idf.yaml b/tests/components/audio/test.esp32-idf.yaml new file mode 100644 index 0000000000..dade44d145 --- /dev/null +++ b/tests/components/audio/test.esp32-idf.yaml @@ -0,0 +1 @@ +<<: !include common.yaml