From a8b0133ec115984ed05b8129a6e85dbc56e93166 Mon Sep 17 00:00:00 2001
From: Kevin Ahrendt <kevin.ahrendt@openhomefoundation.org>
Date: Thu, 30 Apr 2026 08:49:28 -0400
Subject: [PATCH] [audio] Enable specific codecs and configure advanced
 features (#16166)

---
 esphome/components/audio/__init__.py       | 196 ++++++++++++++++++++-
 tests/components/audio/common.yaml         |  14 ++
 tests/components/audio/test.esp32-idf.yaml |   1 +
 3 files changed, 207 insertions(+), 4 deletions(-)
 create mode 100644 tests/components/audio/common.yaml
 create mode 100644 tests/components/audio/test.esp32-idf.yaml

diff --git a/esphome/components/audio/__init__.py b/esphome/components/audio/__init__.py
index fe111be31e..db8f69e6a5 100644
--- a/esphome/components/audio/__init__.py
+++ b/esphome/components/audio/__init__.py
@@ -1,4 +1,4 @@
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 
 import esphome.codegen as cg
 from esphome.components.esp32 import (
@@ -7,7 +7,12 @@ from esphome.components.esp32 import (
     include_builtin_idf_component,
 )
 import esphome.config_validation as cv
-from esphome.const import CONF_BITS_PER_SAMPLE, CONF_NUM_CHANNELS, CONF_SAMPLE_RATE
+from esphome.const import (
+    CONF_BITS_PER_SAMPLE,
+    CONF_NUM_CHANNELS,
+    CONF_SAMPLE_RATE,
+    CONF_SIZE,
+)
 from esphome.core import CORE
 import esphome.final_validate as fv
 
@@ -25,13 +30,46 @@ AUDIO_FILE_TYPE_ENUM = {
     "OPUS": AudioFileType.OPUS,
 }
 
+MEMORY_PSRAM = "psram"
+MEMORY_INTERNAL = "internal"
+MEMORY_LOCATIONS = [MEMORY_PSRAM, MEMORY_INTERNAL]
+
+
+@dataclass
+class FlacOptions:
+    buffer_memory: str | None = None
+
+
+@dataclass
+class Mp3Options:
+    buffer_memory: str | None = None
+
+
+@dataclass
+class OpusPseudostackOptions:
+    threadsafe: bool | None = None
+    buffer_memory: str | None = None
+    size: int | None = None
+
+
+@dataclass
+class OpusOptions:
+    floating_point: bool | None = None
+    state_memory: str | None = None
+    pseudostack: OpusPseudostackOptions = field(default_factory=OpusPseudostackOptions)
+
 
 @dataclass
 class AudioData:
     flac_support: bool = False
     mp3_support: bool = False
     opus_support: bool = False
+    # WAV defaults to True for backward compatibility; will become opt-in in a future release
+    wav_support: bool = True
     micro_decoder_support: bool = False
+    flac: FlacOptions = field(default_factory=FlacOptions)
+    mp3: Mp3Options = field(default_factory=Mp3Options)
+    opus: OpusOptions = field(default_factory=OpusOptions)
 
 
 def _get_data() -> AudioData:
@@ -55,6 +93,11 @@ def request_opus_support() -> None:
     _get_data().opus_support = True
 
 
+def request_wav_support() -> None:
+    """Request WAV codec support for audio decoding."""
+    _get_data().wav_support = True
+
+
 def request_micro_decoder_support() -> None:
     """Request micro-decoder library support for audio decoding."""
     _get_data().micro_decoder_support = True
@@ -67,9 +110,78 @@ CONF_MAX_CHANNELS = "max_channels"
 CONF_MIN_SAMPLE_RATE = "min_sample_rate"
 CONF_MAX_SAMPLE_RATE = "max_sample_rate"
 
+CONF_CODECS = "codecs"
+CONF_WAV = "wav"
+CONF_FLAC = "flac"
+CONF_MP3 = "mp3"
+CONF_OPUS = "opus"
+CONF_BUFFER_MEMORY = "buffer_memory"
+CONF_FLOATING_POINT = "floating_point"
+CONF_STATE_MEMORY = "state_memory"
+CONF_PSEUDOSTACK = "pseudostack"
+CONF_THREADSAFE = "threadsafe"
+
+
+_MEMORY_LOCATION_VALIDATOR = cv.one_of(*MEMORY_LOCATIONS, lower=True)
+
+
+def _maybe_empty_codec(schema):
+    """Wrap a codec dict schema so that a bare key (None value) is treated as an empty dict."""
+
+    def validator(value):
+        if value is None:
+            value = {}
+        return schema(value)
+
+    return validator
+
+
+CODEC_FLAC_SCHEMA = cv.Schema(
+    {
+        cv.Optional(CONF_BUFFER_MEMORY): _MEMORY_LOCATION_VALIDATOR,
+    }
+)
+
+CODEC_MP3_SCHEMA = cv.Schema(
+    {
+        cv.Optional(CONF_BUFFER_MEMORY): _MEMORY_LOCATION_VALIDATOR,
+    }
+)
+
+OPUS_PSEUDOSTACK_SCHEMA = cv.Schema(
+    {
+        cv.Optional(CONF_THREADSAFE): cv.boolean,
+        cv.Optional(CONF_BUFFER_MEMORY): _MEMORY_LOCATION_VALIDATOR,
+        cv.Optional(CONF_SIZE): cv.int_range(60000, 240000),
+    }
+)
+
+CODEC_OPUS_SCHEMA = cv.Schema(
+    {
+        cv.Optional(CONF_FLOATING_POINT): cv.boolean,
+        cv.Optional(CONF_STATE_MEMORY): _MEMORY_LOCATION_VALIDATOR,
+        cv.Optional(CONF_PSEUDOSTACK): _maybe_empty_codec(OPUS_PSEUDOSTACK_SCHEMA),
+    }
+)
+
+CODEC_WAV_SCHEMA = cv.Schema({})
+
+CODECS_SCHEMA = cv.Schema(
+    {
+        cv.Optional(CONF_FLAC): _maybe_empty_codec(CODEC_FLAC_SCHEMA),
+        cv.Optional(CONF_MP3): _maybe_empty_codec(CODEC_MP3_SCHEMA),
+        cv.Optional(CONF_OPUS): _maybe_empty_codec(CODEC_OPUS_SCHEMA),
+        cv.Optional(CONF_WAV): _maybe_empty_codec(CODEC_WAV_SCHEMA),
+    }
+)
 
 CONFIG_SCHEMA = cv.All(
-    cv.Schema({}),
+    cv.Schema(
+        {
+            cv.Optional(CONF_CODECS): _maybe_empty_codec(CODECS_SCHEMA),
+        }
+    ),
+    cv.only_on_esp32,
 )
 
 AUDIO_COMPONENT_SCHEMA = cv.Schema(
@@ -208,6 +320,15 @@ def final_validate_audio_schema(
     )
 
 
+def _emit_memory_pair(value: str | None, psram_key: str, internal_key: str) -> None:
+    if value == MEMORY_PSRAM:
+        add_idf_sdkconfig_option(psram_key, True)
+        add_idf_sdkconfig_option(internal_key, False)
+    elif value == MEMORY_INTERNAL:
+        add_idf_sdkconfig_option(psram_key, False)
+        add_idf_sdkconfig_option(internal_key, True)
+
+
 async def to_code(config):
     # Re-enable ESP-IDF's HTTP client (excluded by default to save compile time)
     include_builtin_idf_component("esp_http_client")
@@ -219,6 +340,36 @@ async def to_code(config):
 
     data = _get_data()
 
+    # Merge user-supplied codec configuration (additive: presence enables the codec)
+    if codecs_config := config.get(CONF_CODECS):
+        if (flac_config := codecs_config.get(CONF_FLAC)) is not None:
+            data.flac_support = True
+            if (buffer_memory := flac_config.get(CONF_BUFFER_MEMORY)) is not None:
+                data.flac.buffer_memory = buffer_memory
+        if (mp3_config := codecs_config.get(CONF_MP3)) is not None:
+            data.mp3_support = True
+            if (buffer_memory := mp3_config.get(CONF_BUFFER_MEMORY)) is not None:
+                data.mp3.buffer_memory = buffer_memory
+        if (opus_config := codecs_config.get(CONF_OPUS)) is not None:
+            data.opus_support = True
+            floating_point = opus_config.get(CONF_FLOATING_POINT)
+            if floating_point is not None:
+                data.opus.floating_point = floating_point
+            if (state_memory := opus_config.get(CONF_STATE_MEMORY)) is not None:
+                data.opus.state_memory = state_memory
+            if (pseudostack_config := opus_config.get(CONF_PSEUDOSTACK)) is not None:
+                threadsafe = pseudostack_config.get(CONF_THREADSAFE)
+                if threadsafe is not None:
+                    data.opus.pseudostack.threadsafe = threadsafe
+                if (
+                    buffer_memory := pseudostack_config.get(CONF_BUFFER_MEMORY)
+                ) is not None:
+                    data.opus.pseudostack.buffer_memory = buffer_memory
+                if (size := pseudostack_config.get(CONF_SIZE)) is not None:
+                    data.opus.pseudostack.size = size
+        if CONF_WAV in codecs_config:
+            data.wav_support = True
+
     if data.micro_decoder_support:
         add_idf_component(name="esphome/micro-decoder", ref="0.2.0")
 
@@ -229,13 +380,50 @@ async def to_code(config):
             add_idf_sdkconfig_option("CONFIG_MICRO_DECODER_CODEC_MP3", False)
         if not data.opus_support:
             add_idf_sdkconfig_option("CONFIG_MICRO_DECODER_CODEC_OPUS", False)
+        if not data.wav_support:
+            add_idf_sdkconfig_option("CONFIG_MICRO_DECODER_CODEC_WAV", False)
 
-    # Legacy audio_decoder.cpp support defines and components
+    # Configure each codec library.
+    # Adds a define and IDF component for legacy `audio_decoder.cpp`.
     if data.flac_support:
         cg.add_define("USE_AUDIO_FLAC_SUPPORT")
         add_idf_component(name="esphome/micro-flac", ref="0.1.1")
+        _emit_memory_pair(
+            data.flac.buffer_memory,
+            "CONFIG_MICRO_FLAC_PREFER_PSRAM",
+            "CONFIG_MICRO_FLAC_PREFER_INTERNAL",
+        )
     if data.mp3_support:
         cg.add_define("USE_AUDIO_MP3_SUPPORT")
+        _emit_memory_pair(
+            data.mp3.buffer_memory,
+            "CONFIG_MP3_DECODER_PREFER_PSRAM",
+            "CONFIG_MP3_DECODER_PREFER_INTERNAL",
+        )
     if data.opus_support:
         cg.add_define("USE_AUDIO_OPUS_SUPPORT")
         add_idf_component(name="esphome/micro-opus", ref="0.3.6")
+        if data.opus.floating_point is not None:
+            add_idf_sdkconfig_option(
+                "CONFIG_OPUS_FLOATING_POINT", data.opus.floating_point
+            )
+        _emit_memory_pair(
+            data.opus.state_memory,
+            "CONFIG_OPUS_STATE_PREFER_PSRAM",
+            "CONFIG_OPUS_STATE_PREFER_INTERNAL",
+        )
+        if data.opus.pseudostack.threadsafe is True:
+            add_idf_sdkconfig_option("CONFIG_OPUS_THREADSAFE_PSEUDOSTACK", True)
+            add_idf_sdkconfig_option("CONFIG_OPUS_NONTHREADSAFE_PSEUDOSTACK", False)
+        elif data.opus.pseudostack.threadsafe is False:
+            add_idf_sdkconfig_option("CONFIG_OPUS_THREADSAFE_PSEUDOSTACK", False)
+            add_idf_sdkconfig_option("CONFIG_OPUS_NONTHREADSAFE_PSEUDOSTACK", True)
+        _emit_memory_pair(
+            data.opus.pseudostack.buffer_memory,
+            "CONFIG_OPUS_PSEUDOSTACK_PREFER_PSRAM",
+            "CONFIG_OPUS_PSEUDOSTACK_PREFER_INTERNAL",
+        )
+        if data.opus.pseudostack.size is not None:
+            add_idf_sdkconfig_option(
+                "CONFIG_OPUS_PSEUDOSTACK_SIZE", data.opus.pseudostack.size
+            )
diff --git a/tests/components/audio/common.yaml b/tests/components/audio/common.yaml
new file mode 100644
index 0000000000..3cde9b8449
--- /dev/null
+++ b/tests/components/audio/common.yaml
@@ -0,0 +1,14 @@
+audio:
+  codecs:
+    flac:
+      buffer_memory: internal
+    mp3:
+      buffer_memory: psram
+    opus:
+      floating_point: false
+      state_memory: psram
+      pseudostack:
+        threadsafe: false
+        buffer_memory: internal
+        size: 80000
+    wav:
diff --git a/tests/components/audio/test.esp32-idf.yaml b/tests/components/audio/test.esp32-idf.yaml
new file mode 100644
index 0000000000..dade44d145
--- /dev/null
+++ b/tests/components/audio/test.esp32-idf.yaml
@@ -0,0 +1 @@
+<<: !include common.yaml