mirror of
https://github.com/esphome/esphome.git
synced 2026-06-24 12:53:26 +00:00
[i2s_audio] Fix speaker DMA buffer sizing and validate bit depth at compile time (#16672)
This commit is contained in:
@@ -170,7 +170,7 @@ def i2s_audio_component_schema(
|
||||
min=1
|
||||
),
|
||||
cv.Optional(CONF_BITS_PER_SAMPLE, default=default_bits_per_sample): cv.All(
|
||||
_validate_bits, cv.one_of(*I2S_BITS_PER_SAMPLE)
|
||||
_validate_bits, cv.int_, cv.one_of(*I2S_BITS_PER_SAMPLE)
|
||||
),
|
||||
cv.Optional(CONF_I2S_MODE, default=CONF_PRIMARY): cv.one_of(
|
||||
*I2S_MODE_OPTIONS, lower=True
|
||||
|
||||
@@ -98,11 +98,19 @@ def _set_stream_limits(config):
|
||||
min_sample_rate=config.get(CONF_SAMPLE_RATE),
|
||||
max_sample_rate=config.get(CONF_SAMPLE_RATE),
|
||||
)(config)
|
||||
elif config[CONF_I2S_MODE] == CONF_PRIMARY:
|
||||
# Primary mode has modifiable stream settings
|
||||
return config
|
||||
|
||||
# The original ESP32 cannot lay out sub-16-bit slots that match ESPHome's packed audio, so the smallest
|
||||
# stream it accepts is 16-bit (see start_i2s_driver); the other variants handle 8-bit.
|
||||
min_bits_per_sample = 16 if esp32.get_esp32_variant() == esp32.VARIANT_ESP32 else 8
|
||||
|
||||
if config[CONF_I2S_MODE] == CONF_PRIMARY:
|
||||
# Primary mode can reconfigure the bus to the incoming sample rate and channel count, but the
|
||||
# configured bits per sample is a hard ceiling: the speaker rejects any stream that exceeds the
|
||||
# slot bit width it was set up with (see start_i2s_driver), so advertise that as the maximum.
|
||||
audio.set_stream_limits(
|
||||
min_bits_per_sample=8,
|
||||
max_bits_per_sample=32,
|
||||
min_bits_per_sample=min_bits_per_sample,
|
||||
max_bits_per_sample=config[CONF_BITS_PER_SAMPLE],
|
||||
min_channels=1,
|
||||
max_channels=2,
|
||||
min_sample_rate=16000,
|
||||
@@ -111,13 +119,13 @@ def _set_stream_limits(config):
|
||||
else:
|
||||
# Secondary mode has unmodifiable max bits per sample and min/max sample rates
|
||||
audio.set_stream_limits(
|
||||
min_bits_per_sample=8,
|
||||
max_bits_per_sample=config.get(CONF_BITS_PER_SAMPLE),
|
||||
min_bits_per_sample=min_bits_per_sample,
|
||||
max_bits_per_sample=config[CONF_BITS_PER_SAMPLE],
|
||||
min_channels=1,
|
||||
max_channels=2,
|
||||
min_sample_rate=config.get(CONF_SAMPLE_RATE),
|
||||
max_sample_rate=config.get(CONF_SAMPLE_RATE),
|
||||
)
|
||||
)(config)
|
||||
|
||||
return config
|
||||
|
||||
@@ -134,12 +142,11 @@ def _validate_esp32_variant(config):
|
||||
if config[CONF_DAC_TYPE] == "internal":
|
||||
if variant not in INTERNAL_DAC_VARIANTS:
|
||||
raise cv.Invalid(f"{variant} does not have an internal DAC")
|
||||
elif (
|
||||
variant == esp32.VARIANT_ESP32
|
||||
and config.get(CONF_BITS_PER_SAMPLE) == 8
|
||||
and config.get(CONF_CHANNEL) in (CONF_MONO, CONF_LEFT, CONF_RIGHT)
|
||||
):
|
||||
raise cv.Invalid("8-bit mono mode is not supported on ESP32")
|
||||
elif variant == esp32.VARIANT_ESP32 and config[CONF_BITS_PER_SAMPLE] == 8:
|
||||
# The original ESP32 I2S peripheral packs each sample into a whole number of 16-bit words, so an
|
||||
# 8-bit slot does not line up with ESPHome's tightly packed audio (see start_i2s_driver). Reject it
|
||||
# at config time rather than emitting corrupted output at runtime.
|
||||
raise cv.Invalid("8-bit audio is not supported on the original ESP32")
|
||||
return config
|
||||
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
#ifdef USE_ESP32
|
||||
|
||||
#include <driver/i2s_std.h>
|
||||
#include <hal/dma_types.h>
|
||||
|
||||
#include "esphome/components/audio/audio.h"
|
||||
#include "esphome/components/audio/audio_transfer_buffer.h"
|
||||
@@ -16,8 +17,16 @@ namespace esphome::i2s_audio {
|
||||
|
||||
static const char *const TAG = "i2s_audio.speaker.std";
|
||||
|
||||
static constexpr uint32_t DMA_BUFFER_DURATION_MS = 15;
|
||||
static constexpr size_t DMA_BUFFERS_COUNT = 4;
|
||||
static constexpr uint32_t DMA_BUFFER_DURATION_MS = 10;
|
||||
static constexpr size_t DMA_BUFFERS_COUNT = 5;
|
||||
// ESP-IDF clamps each DMA descriptor to this many bytes when allocating the channel (see i2s_get_buf_size in
|
||||
// the I2S driver). Mirror its target-dependent selection so the requested dma_frame_num stays in range; the
|
||||
// speaker task reads the size actually allocated back from the driver rather than relying on this value.
|
||||
#if SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE
|
||||
static constexpr size_t I2S_DMA_BUFFER_MAX_SIZE = DMA_DESCRIPTOR_BUFFER_MAX_SIZE_64B_ALIGNED;
|
||||
#else
|
||||
static constexpr size_t I2S_DMA_BUFFER_MAX_SIZE = DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED;
|
||||
#endif
|
||||
// Sized to comfortably absorb scheduling jitter: at most DMA_BUFFERS_COUNT events can be in flight,
|
||||
// doubled so that a transient backlog never overruns the queue (which would desync the lockstep
|
||||
// invariant between i2s_event_queue_ and write_records_queue_).
|
||||
@@ -27,6 +36,17 @@ static constexpr size_t I2S_EVENT_QUEUE_COUNT = DMA_BUFFERS_COUNT * 2;
|
||||
// without masking real failures.
|
||||
static constexpr TickType_t WRITE_TIMEOUT_TICKS = pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS * (DMA_BUFFERS_COUNT + 1));
|
||||
|
||||
// Requested frames per DMA buffer for the given stream, clamped so the byte size stays within the ESP-IDF
|
||||
// maximum DMA descriptor size. This is only the value handed to the channel config: ESP-IDF may still adjust
|
||||
// it (e.g. cache-line rounding on some targets), so the speaker task reads the size actually allocated back
|
||||
// from the driver instead of assuming this value. Clamping here keeps the request in range and avoids a
|
||||
// noisy ESP-IDF "dma frame num is out of dma buffer size" warning at high sample rates or bit depths.
|
||||
static uint32_t dma_buffer_frames(const audio::AudioStreamInfo &stream_info) {
|
||||
const uint32_t frames_from_duration = stream_info.ms_to_frames(DMA_BUFFER_DURATION_MS);
|
||||
const uint32_t max_frames = I2S_DMA_BUFFER_MAX_SIZE / stream_info.frames_to_bytes(1);
|
||||
return std::min(frames_from_duration, max_frames);
|
||||
}
|
||||
|
||||
void I2SAudioSpeaker::dump_config() {
|
||||
I2SAudioSpeakerBase::dump_config();
|
||||
const char *fmt_str;
|
||||
@@ -57,8 +77,21 @@ void I2SAudioSpeaker::run_speaker_task() {
|
||||
// avoids unnecessary single-frame splices.
|
||||
const size_t ring_buffer_size =
|
||||
(this->current_stream_info_.ms_to_bytes(ring_buffer_duration) / bytes_per_frame) * bytes_per_frame;
|
||||
const uint32_t frames_per_dma_buffer = this->current_stream_info_.ms_to_frames(DMA_BUFFER_DURATION_MS);
|
||||
const size_t dma_buffer_bytes = this->current_stream_info_.frames_to_bytes(frames_per_dma_buffer);
|
||||
// ESP-IDF may allocate smaller (or cache-line-rounded) DMA buffers than dma_buffer_frames() requested: it
|
||||
// clamps each descriptor to the max DMA descriptor size and, on targets that route internal memory through
|
||||
// the L1 cache (e.g. ESP32-P4), rounds the buffer to the cache line. Read the size the driver actually
|
||||
// allocated so preload, silence padding, and the write/event lockstep all match it exactly. The channel is
|
||||
// in the READY state here because start_i2s_driver() initialized it before this task was created.
|
||||
size_t dma_buffer_bytes;
|
||||
i2s_chan_info_t chan_info;
|
||||
if (i2s_channel_get_info(this->tx_handle_, &chan_info) == ESP_OK && chan_info.total_dma_buf_size > 0) {
|
||||
// total_dma_buf_size spans all DMA_BUFFERS_COUNT descriptors and is an exact multiple of the count.
|
||||
dma_buffer_bytes = chan_info.total_dma_buf_size / DMA_BUFFERS_COUNT;
|
||||
} else {
|
||||
// Should not happen for a READY channel; fall back to the requested size.
|
||||
dma_buffer_bytes = this->current_stream_info_.frames_to_bytes(dma_buffer_frames(this->current_stream_info_));
|
||||
}
|
||||
const uint32_t frames_per_dma_buffer = this->current_stream_info_.bytes_to_frames(dma_buffer_bytes);
|
||||
|
||||
bool successful_setup = false;
|
||||
|
||||
@@ -308,12 +341,24 @@ esp_err_t I2SAudioSpeaker::start_i2s_driver(audio::AudioStreamInfo &audio_stream
|
||||
return ESP_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
#ifdef USE_ESP32_VARIANT_ESP32
|
||||
// The original ESP32 I2S peripheral stores each sample in a whole number of 16-bit words (a 24-bit sample
|
||||
// occupies 4 bytes in the DMA buffer, an 8-bit sample 2 bytes), but ESPHome's audio pipeline packs samples
|
||||
// tightly (3 bytes for 24-bit, 1 for 8-bit). The two layouts only line up when the bit depth is a multiple
|
||||
// of 16, so reject anything else rather than emit corrupted audio.
|
||||
if (audio_stream_info.get_bits_per_sample() % 16 != 0) {
|
||||
ESP_LOGE(TAG, "ESP32 supports only 16- or 32-bit audio, got %u-bit",
|
||||
(unsigned) audio_stream_info.get_bits_per_sample());
|
||||
return ESP_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
#endif // USE_ESP32_VARIANT_ESP32
|
||||
|
||||
if (!this->parent_->try_lock()) {
|
||||
ESP_LOGE(TAG, "Parent bus is busy");
|
||||
return ESP_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
uint32_t dma_buffer_length = audio_stream_info.ms_to_frames(DMA_BUFFER_DURATION_MS);
|
||||
uint32_t dma_buffer_length = dma_buffer_frames(audio_stream_info);
|
||||
|
||||
i2s_role_t i2s_role = this->i2s_role_;
|
||||
i2s_clock_src_t clk_src = I2S_CLK_SRC_DEFAULT;
|
||||
|
||||
Reference in New Issue
Block a user