mirror of
https://github.com/esphome/esphome.git
synced 2026-06-24 14:19:03 +00:00
[audio] Use RingBufferAudioSource for resampling (#16560)
This commit is contained in:
@@ -12,16 +12,17 @@ static const uint32_t READ_WRITE_TIMEOUT_MS = 20;
|
||||
|
||||
AudioResampler::AudioResampler(size_t input_buffer_size, size_t output_buffer_size)
|
||||
: input_buffer_size_(input_buffer_size), output_buffer_size_(output_buffer_size) {
|
||||
this->input_transfer_buffer_ = AudioSourceTransferBuffer::create(input_buffer_size);
|
||||
this->output_transfer_buffer_ = AudioSinkTransferBuffer::create(output_buffer_size);
|
||||
}
|
||||
|
||||
esp_err_t AudioResampler::add_source(std::weak_ptr<ring_buffer::RingBuffer> &input_ring_buffer) {
|
||||
if (this->input_transfer_buffer_ != nullptr) {
|
||||
this->input_transfer_buffer_->set_source(input_ring_buffer);
|
||||
return ESP_OK;
|
||||
// The zero-copy RingBufferAudioSource is created lazily on the first resample() call, once both the ring
|
||||
// buffer (stored here) and the input stream info (set by start()) are available, in either order.
|
||||
this->source_ring_buffer_ = input_ring_buffer.lock();
|
||||
if (this->source_ring_buffer_ == nullptr) {
|
||||
return ESP_ERR_INVALID_STATE;
|
||||
}
|
||||
return ESP_ERR_NO_MEM;
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t AudioResampler::add_sink(std::weak_ptr<ring_buffer::RingBuffer> &output_ring_buffer) {
|
||||
@@ -47,7 +48,7 @@ esp_err_t AudioResampler::start(AudioStreamInfo &input_stream_info, AudioStreamI
|
||||
this->input_stream_info_ = input_stream_info;
|
||||
this->output_stream_info_ = output_stream_info;
|
||||
|
||||
if ((this->input_transfer_buffer_ == nullptr) || (this->output_transfer_buffer_ == nullptr)) {
|
||||
if (this->output_transfer_buffer_ == nullptr) {
|
||||
return ESP_ERR_NO_MEM;
|
||||
}
|
||||
|
||||
@@ -56,6 +57,13 @@ esp_err_t AudioResampler::start(AudioStreamInfo &input_stream_info, AudioStreamI
|
||||
return ESP_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
// Reject frame sizes that can't be used as the zero-copy source's alignment up front, where the caller checks
|
||||
// the return code. The lazy create() in resample() keeps its own guard since it runs before the uint8_t cast.
|
||||
const size_t bytes_per_frame = this->input_stream_info_.frames_to_bytes(1);
|
||||
if ((bytes_per_frame == 0) || (bytes_per_frame > RingBufferAudioSource::MAX_ALIGNMENT_BYTES)) {
|
||||
return ESP_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
if ((input_stream_info.get_sample_rate() != output_stream_info.get_sample_rate()) ||
|
||||
(input_stream_info.get_bits_per_sample() != output_stream_info.get_bits_per_sample())) {
|
||||
this->resampler_ = make_unique<esp_audio_libs::resampler::Resampler>(
|
||||
@@ -87,8 +95,27 @@ esp_err_t AudioResampler::start(AudioStreamInfo &input_stream_info, AudioStreamI
|
||||
}
|
||||
|
||||
AudioResamplerState AudioResampler::resample(bool stop_gracefully, int32_t *ms_differential) {
|
||||
if (this->audio_source_ == nullptr) {
|
||||
// Lazily create the zero-copy source on first use. Frame-aligned reads ensure multi-channel frames are
|
||||
// never split across the ring buffer's wrap boundary.
|
||||
const size_t bytes_per_frame = this->input_stream_info_.frames_to_bytes(1);
|
||||
if ((bytes_per_frame == 0) || (bytes_per_frame > RingBufferAudioSource::MAX_ALIGNMENT_BYTES)) {
|
||||
// Stream info is unset or the frame is too large to use as an alignment; the uint8_t cast below would
|
||||
// truncate it and could yield a source that tears frames.
|
||||
return AudioResamplerState::FAILED;
|
||||
}
|
||||
// Pass the shared_ptr by copy so a failed create() leaves source_ring_buffer_ intact; release our
|
||||
// reference only after the source has taken ownership.
|
||||
this->audio_source_ = RingBufferAudioSource::create(this->source_ring_buffer_, this->input_buffer_size_,
|
||||
static_cast<uint8_t>(bytes_per_frame));
|
||||
if (this->audio_source_ == nullptr) {
|
||||
return AudioResamplerState::FAILED;
|
||||
}
|
||||
this->source_ring_buffer_.reset();
|
||||
}
|
||||
|
||||
if (stop_gracefully) {
|
||||
if (!this->input_transfer_buffer_->has_buffered_data() && (this->output_transfer_buffer_->available() == 0)) {
|
||||
if (!this->audio_source_->has_buffered_data() && (this->output_transfer_buffer_->available() == 0)) {
|
||||
return AudioResamplerState::FINISHED;
|
||||
}
|
||||
}
|
||||
@@ -102,9 +129,11 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully, int32_t *ms_d
|
||||
delay(READ_WRITE_TIMEOUT_MS);
|
||||
}
|
||||
|
||||
this->input_transfer_buffer_->transfer_data_from_source(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));
|
||||
// Expose a chunk of the ring buffer's internal storage. pre_shift is ignored by RingBufferAudioSource
|
||||
// (there is no intermediate transfer buffer to compact).
|
||||
this->audio_source_->fill(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS), false);
|
||||
|
||||
if (this->input_transfer_buffer_->available() == 0) {
|
||||
if (this->audio_source_->available() == 0) {
|
||||
// No samples available to process
|
||||
return AudioResamplerState::RESAMPLING;
|
||||
}
|
||||
@@ -112,17 +141,17 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully, int32_t *ms_d
|
||||
const size_t bytes_free = this->output_transfer_buffer_->free();
|
||||
const uint32_t frames_free = this->output_stream_info_.bytes_to_frames(bytes_free);
|
||||
|
||||
const size_t bytes_available = this->input_transfer_buffer_->available();
|
||||
const size_t bytes_available = this->audio_source_->available();
|
||||
const uint32_t frames_available = this->input_stream_info_.bytes_to_frames(bytes_available);
|
||||
|
||||
if ((this->input_stream_info_.get_sample_rate() != this->output_stream_info_.get_sample_rate()) ||
|
||||
(this->input_stream_info_.get_bits_per_sample() != this->output_stream_info_.get_bits_per_sample())) {
|
||||
// Adjust gain by -3 dB to avoid clipping due to the resampling process
|
||||
esp_audio_libs::resampler::ResamplerResults results =
|
||||
this->resampler_->resample(this->input_transfer_buffer_->get_buffer_start(),
|
||||
this->output_transfer_buffer_->get_buffer_end(), frames_available, frames_free, -3);
|
||||
this->resampler_->resample(this->audio_source_->data(), this->output_transfer_buffer_->get_buffer_end(),
|
||||
frames_available, frames_free, -3);
|
||||
|
||||
this->input_transfer_buffer_->decrease_buffer_length(this->input_stream_info_.frames_to_bytes(results.frames_used));
|
||||
this->audio_source_->consume(this->input_stream_info_.frames_to_bytes(results.frames_used));
|
||||
this->output_transfer_buffer_->increase_buffer_length(
|
||||
this->output_stream_info_.frames_to_bytes(results.frames_generated));
|
||||
|
||||
@@ -146,10 +175,10 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully, int32_t *ms_d
|
||||
const size_t bytes_to_transfer = std::min(this->output_stream_info_.frames_to_bytes(frames_free),
|
||||
this->input_stream_info_.frames_to_bytes(frames_available));
|
||||
|
||||
std::memcpy((void *) this->output_transfer_buffer_->get_buffer_end(),
|
||||
(void *) this->input_transfer_buffer_->get_buffer_start(), bytes_to_transfer);
|
||||
std::memcpy((void *) this->output_transfer_buffer_->get_buffer_end(), (const void *) this->audio_source_->data(),
|
||||
bytes_to_transfer);
|
||||
|
||||
this->input_transfer_buffer_->decrease_buffer_length(bytes_to_transfer);
|
||||
this->audio_source_->consume(bytes_to_transfer);
|
||||
this->output_transfer_buffer_->increase_buffer_length(bytes_to_transfer);
|
||||
}
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ namespace esphome::audio {
|
||||
enum class AudioResamplerState : uint8_t {
|
||||
RESAMPLING, // More data is available to resample
|
||||
FINISHED, // All file data has been resampled and transferred
|
||||
FAILED, // Unused state included for consistency among Audio classes
|
||||
FAILED, // Failed to allocate the audio source
|
||||
};
|
||||
|
||||
class AudioResampler {
|
||||
@@ -32,14 +32,16 @@ class AudioResampler {
|
||||
* component). Also supports converting bits per sample.
|
||||
*/
|
||||
public:
|
||||
/// @brief Allocates the input and output transfer buffers
|
||||
/// @param input_buffer_size Size of the input transfer buffer in bytes.
|
||||
/// @brief Allocates the output transfer buffer. The input source is created later in resample().
|
||||
/// @param input_buffer_size Max bytes exposed per fill() call on the zero-copy input source.
|
||||
/// @param output_buffer_size Size of the output transfer buffer in bytes.
|
||||
AudioResampler(size_t input_buffer_size, size_t output_buffer_size);
|
||||
|
||||
/// @brief Adds a source ring buffer for audio data. Takes ownership of the ring buffer in a shared_ptr.
|
||||
/// @param input_ring_buffer weak_ptr of a shared_ptr of the sink ring buffer to transfer ownership
|
||||
/// @return ESP_OK if successsful, ESP_ERR_NO_MEM if the transfer buffer wasn't allocated
|
||||
/// @brief Sets the ring buffer the audio is read from and takes shared ownership of it. The zero-copy
|
||||
/// RingBufferAudioSource that reads directly from its internal storage is created lazily on the first
|
||||
/// resample() call, so add_source() and start() may be called in any order.
|
||||
/// @param input_ring_buffer weak_ptr of a shared_ptr of the source ring buffer to transfer ownership
|
||||
/// @return ESP_OK if successful, ESP_ERR_INVALID_STATE if the ring buffer is no longer alive
|
||||
esp_err_t add_source(std::weak_ptr<ring_buffer::RingBuffer> &input_ring_buffer);
|
||||
|
||||
/// @brief Adds a sink ring buffer for resampled audio. Takes ownership of the ring buffer in a shared_ptr.
|
||||
@@ -78,7 +80,8 @@ class AudioResampler {
|
||||
void set_pause_output_state(bool pause_state) { this->pause_output_ = pause_state; }
|
||||
|
||||
protected:
|
||||
std::unique_ptr<AudioSourceTransferBuffer> input_transfer_buffer_;
|
||||
std::shared_ptr<ring_buffer::RingBuffer> source_ring_buffer_;
|
||||
std::unique_ptr<RingBufferAudioSource> audio_source_;
|
||||
std::unique_ptr<AudioSinkTransferBuffer> output_transfer_buffer_;
|
||||
|
||||
size_t input_buffer_size_;
|
||||
|
||||
Reference in New Issue
Block a user