[audio] Use the microMP3 library instead of esp-audio-libs (#16236)

This commit is contained in:
Kevin Ahrendt
2026-05-04 18:41:50 -04:00
committed by GitHub
parent 7c2a63bf82
commit 857e529803
4 changed files with 61 additions and 63 deletions

View File

@@ -395,6 +395,7 @@ async def to_code(config):
)
if data.mp3_support:
cg.add_define("USE_AUDIO_MP3_SUPPORT")
add_idf_component(name="esphome/micro-mp3", ref="0.2.0")
_emit_memory_pair(
data.mp3.buffer_memory,
"CONFIG_MP3_DECODER_PREFER_PSRAM",

View File

@@ -20,14 +20,6 @@ AudioDecoder::AudioDecoder(size_t input_buffer_size, size_t output_buffer_size)
this->output_transfer_buffer_ = AudioSinkTransferBuffer::create(output_buffer_size);
}
AudioDecoder::~AudioDecoder() {
#ifdef USE_AUDIO_MP3_SUPPORT
if (this->audio_file_type_ == AudioFileType::MP3) {
esp_audio_libs::helix_decoder::MP3FreeDecoder(this->mp3_decoder_);
}
#endif
}
esp_err_t AudioDecoder::add_source(std::weak_ptr<RingBuffer> &input_ring_buffer) {
auto source = AudioSourceTransferBuffer::create(this->input_buffer_size_);
if (source == nullptr) {
@@ -92,13 +84,10 @@ esp_err_t AudioDecoder::start(AudioFileType audio_file_type) {
#endif
#ifdef USE_AUDIO_MP3_SUPPORT
case AudioFileType::MP3:
this->mp3_decoder_ = esp_audio_libs::helix_decoder::MP3InitDecoder();
// MP3 always has 1152 samples per chunk
this->free_buffer_required_ = 1152 * sizeof(int16_t) * 2; // samples * size per sample * channels
// Always reallocate the output transfer buffer to the smallest necessary size
this->output_transfer_buffer_->reallocate(this->free_buffer_required_);
this->mp3_decoder_ = make_unique<micro_mp3::Mp3Decoder>();
this->free_buffer_required_ =
this->output_transfer_buffer_->capacity(); // Adjusted and reallocated after reading the header
this->decoder_buffers_internally_ = true;
break;
#endif
#ifdef USE_AUDIO_OPUS_SUPPORT
@@ -312,51 +301,56 @@ FileDecoderState AudioDecoder::decode_flac_() {
#ifdef USE_AUDIO_MP3_SUPPORT
FileDecoderState AudioDecoder::decode_mp3_() {
// Look for the next sync word
int buffer_length = (int) this->input_buffer_->available();
int32_t offset = esp_audio_libs::helix_decoder::MP3FindSyncWord(this->input_buffer_->data(), buffer_length);
// microMP3's samples_decoded value is samples per channel; e.g., what ESPHome typically calls an audio frame.
// microMP3 uses the term frame to refer to an MP3 frame: an encoded packet that contains multiple audio frames.
size_t bytes_consumed = 0;
size_t samples_decoded = 0;
if (offset < 0) {
// New data may have the sync word
this->input_buffer_->consume(buffer_length);
// microMP3 buffers internally: it consumes from our input buffer at its own pace, emits MP3_STREAM_INFO_READY once
// the first frame header is parsed, and only then produces PCM. It handles sync-word search and ID3v2 tag skipping.
micro_mp3::Mp3Result result = this->mp3_decoder_->decode(
this->input_buffer_->data(), this->input_buffer_->available(), this->output_transfer_buffer_->get_buffer_end(),
this->output_transfer_buffer_->free(), bytes_consumed, samples_decoded);
this->input_buffer_->consume(bytes_consumed);
if (result == micro_mp3::MP3_OK) {
if (samples_decoded > 0 && this->audio_stream_info_.has_value()) {
this->output_transfer_buffer_->increase_buffer_length(
this->audio_stream_info_.value().frames_to_bytes(samples_decoded));
}
} else if (result == micro_mp3::MP3_STREAM_INFO_READY) {
// First successful header parse: capture stream info and resize the output buffer to fit one full frame.
// microMP3 always outputs 16-bit PCM.
this->audio_stream_info_ =
audio::AudioStreamInfo(16, this->mp3_decoder_->get_channels(), this->mp3_decoder_->get_sample_rate());
this->free_buffer_required_ =
this->mp3_decoder_->get_samples_per_frame() * this->mp3_decoder_->get_channels() * sizeof(int16_t);
if (!this->output_transfer_buffer_->reallocate(this->free_buffer_required_)) {
return FileDecoderState::FAILED;
}
} else if (result == micro_mp3::MP3_NEED_MORE_DATA) {
return FileDecoderState::MORE_TO_PROCESS;
} else if (result == micro_mp3::MP3_OUTPUT_BUFFER_TOO_SMALL) {
// Reallocate to decode the frame on the next call
if (this->mp3_decoder_->get_channels() > 0) {
this->free_buffer_required_ =
this->mp3_decoder_->get_samples_per_frame() * this->mp3_decoder_->get_channels() * sizeof(int16_t);
} else {
// Fallback to worst-case size if channel info isn't available
this->free_buffer_required_ = this->mp3_decoder_->get_min_output_buffer_bytes();
}
if (!this->output_transfer_buffer_->reallocate(this->free_buffer_required_)) {
return FileDecoderState::FAILED;
}
} else if (result == micro_mp3::MP3_DECODE_ERROR) {
// Corrupt frame skipped; recoverable, retry on next call
ESP_LOGW(TAG, "MP3 decoder skipped a corrupt frame");
return FileDecoderState::POTENTIALLY_FAILED;
}
// Advance read pointer to match the offset for the syncword
this->input_buffer_->consume(offset);
const uint8_t *buffer_start = this->input_buffer_->data();
buffer_length = (int) this->input_buffer_->available();
int err = esp_audio_libs::helix_decoder::MP3Decode(this->mp3_decoder_, &buffer_start, &buffer_length,
(int16_t *) this->output_transfer_buffer_->get_buffer_end(), 0);
size_t consumed = this->input_buffer_->available() - buffer_length;
this->input_buffer_->consume(consumed);
if (err) {
switch (err) {
case esp_audio_libs::helix_decoder::ERR_MP3_OUT_OF_MEMORY:
[[fallthrough]];
case esp_audio_libs::helix_decoder::ERR_MP3_NULL_POINTER:
return FileDecoderState::FAILED;
break;
default:
// Most errors are recoverable by moving on to the next frame, so mark as potentailly failed
return FileDecoderState::POTENTIALLY_FAILED;
break;
}
} else {
esp_audio_libs::helix_decoder::MP3FrameInfo mp3_frame_info;
esp_audio_libs::helix_decoder::MP3GetLastFrameInfo(this->mp3_decoder_, &mp3_frame_info);
if (mp3_frame_info.outputSamps > 0) {
int bytes_per_sample = (mp3_frame_info.bitsPerSample / 8);
this->output_transfer_buffer_->increase_buffer_length(mp3_frame_info.outputSamps * bytes_per_sample);
if (!this->audio_stream_info_.has_value()) {
this->audio_stream_info_ =
audio::AudioStreamInfo(mp3_frame_info.bitsPerSample, mp3_frame_info.nChans, mp3_frame_info.samprate);
}
}
// MP3_ALLOCATION_FAILED, MP3_INPUT_INVALID, or any future error -- not recoverable
ESP_LOGE(TAG, "MP3 decoder failed: %d", static_cast<int>(result));
return FileDecoderState::FAILED;
}
return FileDecoderState::MORE_TO_PROCESS;

View File

@@ -16,9 +16,6 @@
#include "esp_err.h"
// esp-audio-libs
#ifdef USE_AUDIO_MP3_SUPPORT
#include <mp3_decoder.h>
#endif
#include <wav_decoder.h>
// micro-flac
@@ -26,6 +23,11 @@
#include <micro_flac/flac_decoder.h>
#endif
// micro-mp3
#ifdef USE_AUDIO_MP3_SUPPORT
#include <micro_mp3/mp3_decoder.h>
#endif
// micro-opus
#ifdef USE_AUDIO_OPUS_SUPPORT
#include <micro_opus/ogg_opus_decoder.h>
@@ -62,8 +64,7 @@ class AudioDecoder {
/// @param output_buffer_size Size of the output transfer buffer in bytes.
AudioDecoder(size_t input_buffer_size, size_t output_buffer_size);
/// @brief Deallocates the MP3 decoder (the flac, opus, and wav decoders are deallocated automatically)
~AudioDecoder();
~AudioDecoder() = default;
/// @brief Adds a source ring buffer for raw file data. Takes ownership of the ring buffer in a shared_ptr.
/// @param input_ring_buffer weak_ptr of a shared_ptr of the sink ring buffer to transfer ownership
@@ -125,7 +126,7 @@ class AudioDecoder {
#endif
#ifdef USE_AUDIO_MP3_SUPPORT
FileDecoderState decode_mp3_();
esp_audio_libs::helix_decoder::HMP3Decoder mp3_decoder_;
std::unique_ptr<micro_mp3::Mp3Decoder> mp3_decoder_;
#endif
#ifdef USE_AUDIO_OPUS_SUPPORT
FileDecoderState decode_opus_();

View File

@@ -9,6 +9,8 @@ dependencies:
version: 0.2.0
esphome/micro-flac:
version: 0.1.1
esphome/micro-mp3:
version: 0.2.0
esphome/micro-opus:
version: 0.4.0
espressif/esp-dsp: