diff --git a/esphome/components/i2s_audio/speaker/i2s_audio_spdif.cpp b/esphome/components/i2s_audio/speaker/i2s_audio_spdif.cpp index e2146de63c..d257dd1d8f 100644 --- a/esphome/components/i2s_audio/speaker/i2s_audio_spdif.cpp +++ b/esphome/components/i2s_audio/speaker/i2s_audio_spdif.cpp @@ -23,35 +23,14 @@ static const char *const TAG = "i2s_audio.spdif"; // 15 buffers x 4ms = 60ms of DMA buffering (same as 4 x 15ms for standard) static constexpr size_t SPDIF_DMA_BUFFERS_COUNT = 15; -// Timeout for flushing pending frames if no callback received. -static constexpr uint32_t SPDIF_FLUSH_TIMEOUT_MS = 20; - // Number of DMA events between upstream callbacks (~16ms = 4 events x 4ms each). // Matches non-SPDIF timing to prevent overwhelming upstream sync algorithms. static constexpr uint32_t SPDIF_DMA_EVENTS_PER_CALLBACK = 4; -// Consider TX stalled only if no DMA callbacks have arrived for this long. -// Zero-block non-blocking writes alone are not sufficient (they can happen when DMA is simply full). -static constexpr uint32_t SPDIF_STALL_NO_DMA_MS = 80; - -// Fallback stall detector: force recovery if silence writes make no forward progress for too long, -// even if occasional DMA callbacks are still observed. -static constexpr uint32_t SPDIF_STALL_ZERO_PROGRESS_MS = 1000; - -// Minimum spacing between re-prime attempts to avoid churn. -static constexpr uint32_t SPDIF_REPRIME_COOLDOWN_MS = 500; - -// Small waits used in SPDIF mode to keep DMA fed during rapid pipeline churn. -static constexpr uint32_t SPDIF_EMPTY_READ_DELAY_MS = 1; -static constexpr uint32_t SPDIF_SILENCE_LOOP_DELAY_MS = 1; +// Brief retry wait used by play() to catch short free-space windows during rapid track transitions. static constexpr uint32_t SPDIF_PLAY_RETRY_WAIT_MS = 5; -static constexpr size_t SPDIF_I2S_EVENT_QUEUE_COUNT = SPDIF_DMA_BUFFERS_COUNT + 1; - -// Static silence buffer for SPDIF continuous mode -// 192 samples * 2 channels * 2 bytes per sample = 768 bytes -// Stored in flash (.rodata section) to avoid stack/heap usage -static const int16_t SPDIF_SILENCE_BUFFER[SPDIF_BLOCK_SAMPLES * 2] = {0}; +static constexpr size_t SPDIF_I2S_EVENT_QUEUE_COUNT = 2 * SPDIF_DMA_BUFFERS_COUNT; // Static callback functions for SPDIF encoder (avoids std::function overhead) static esp_err_t spdif_preload_cb(void *user_ctx, uint32_t *data, size_t size, TickType_t ticks_to_wait) { @@ -59,7 +38,7 @@ static esp_err_t spdif_preload_cb(void *user_ctx, uint32_t *data, size_t size, T size_t bytes_written = 0; esp_err_t err = i2s_channel_preload_data(speaker->get_tx_handle(), data, size, &bytes_written); if (err != ESP_OK || bytes_written != size) { - ESP_LOGW(TAG, "Preload failed: %s (wrote %zu/%zu bytes)", esp_err_to_name(err), bytes_written, size); + ESP_LOGV(TAG, "Preload failed: %s (wrote %zu/%zu bytes)", esp_err_to_name(err), bytes_written, size); return (err != ESP_OK) ? err : ESP_ERR_NO_MEM; } return ESP_OK; @@ -69,9 +48,8 @@ static esp_err_t spdif_write_cb(void *user_ctx, uint32_t *data, size_t size, Tic auto *speaker = static_cast(user_ctx); size_t bytes_written = 0; esp_err_t err = i2s_channel_write(speaker->get_tx_handle(), data, size, &bytes_written, ticks_to_wait); - // ESP_ERR_TIMEOUT is expected under DMA backpressure in SPDIF mode. - if (err != ESP_OK && err != ESP_ERR_TIMEOUT) { - ESP_LOGW(TAG, "I2S write failed: %s (wrote %zu/%zu bytes)", esp_err_to_name(err), bytes_written, size); + if (err != ESP_OK) { + ESP_LOGV(TAG, "I2S write failed: %s (wrote %zu/%zu bytes)", esp_err_to_name(err), bytes_written, size); } return err; } @@ -157,6 +135,9 @@ void I2SAudioSpeakerSPDIF::run_speaker_task() { this->spdif_encoder_->reset(); } + // Reset lockstep records queue so it starts paired with the (also-reset) i2s_event_queue_. + xQueueReset(this->write_records_queue_); + const uint32_t dma_buffers_duration_ms = DMA_BUFFER_DURATION_MS * SPDIF_DMA_BUFFERS_COUNT; // Ensure ring buffer duration is at least the duration of all DMA buffers const uint32_t ring_buffer_duration = std::max(dma_buffers_duration_ms, this->buffer_duration_ms_); @@ -188,19 +169,16 @@ void I2SAudioSpeakerSPDIF::run_speaker_task() { // Preload DMA buffers with SPDIF-encoded silence before enabling the channel. // This ensures the first data transmitted is valid SPDIF (not raw zeros from // auto_clear) and prevents phantom DMA events before real audio is available. - // Track how many buffers were preloaded so the DMA event loop can skip - // frame accounting until the preloaded silence has fully drained. - uint32_t preload_buffers_remaining = 0; + // Each preloaded block pushes a 0-real-frame record so that the corresponding + // on_sent events drain in lockstep without crediting any audio frames. this->spdif_encoder_->set_preload_mode(true); for (size_t i = 0; i < SPDIF_DMA_BUFFERS_COUNT; i++) { - uint32_t preload_blocks = 0; - esp_err_t preload_err = this->spdif_encoder_->write(reinterpret_cast(SPDIF_SILENCE_BUFFER), - sizeof(SPDIF_SILENCE_BUFFER), - pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS), &preload_blocks); - if (preload_err != ESP_OK || preload_blocks == 0) { - break; // DMA buffers full or error + esp_err_t preload_err = this->spdif_encoder_->flush_with_silence(pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS)); + if (preload_err != ESP_OK) { + break; // DMA preload buffer full or error } - preload_buffers_remaining += preload_blocks; + const uint32_t silence_record = 0; + xQueueSendToBack(this->write_records_queue_, &silence_record, 0); } this->spdif_encoder_->set_preload_mode(false); this->spdif_encoder_->reset(); // Clean encoder state for the main loop @@ -211,299 +189,193 @@ void I2SAudioSpeakerSPDIF::run_speaker_task() { i2s_channel_register_event_callback(this->tx_handle_, &callbacks, this); i2s_channel_enable(this->tx_handle_); - bool stop_gracefully = false; - bool tx_dma_underflow = true; + // Always-fill model: each iteration produces exactly one SPDIF block (= one DMA buffer). + // We drain real PCM up to one block from the ring buffer and silence-pad any remainder. + // Blocking writes pace the loop at the DMA consumption rate. This mirrors the standard + // I2S speaker pattern (PR #16317): fill what you can, then silence-pad whatever is still + // missing to complete the DMA buffer. + const uint32_t block_duration_us = this->current_stream_info_.frames_to_microseconds(SPDIF_BLOCK_SAMPLES); + // Sized to absorb the worst case where every DMA buffer is full when we issue the write. + const TickType_t write_timeout_ticks = + pdMS_TO_TICKS(((block_duration_us * (SPDIF_DMA_BUFFERS_COUNT + 1)) + 999) / 1000); + // Brief read budget when the ring buffer is empty (~half a block). + const TickType_t read_timeout_ticks = pdMS_TO_TICKS(((block_duration_us / 2) + 999) / 1000); - uint32_t frames_written = 0; - - // SPDIF Continuous Silence Mode + Callback Decimation - // - // Key principles: - // 1. NEVER stop the I2S channel - always output a valid SPDIF stream - // 2. When no audio data, output silence-encoded SPDIF blocks (not zeros!) - // 3. Fire callbacks every 4 DMA events (~16ms), matching non-SPDIF timing - // - // This eliminates gaps that cause SPDIF receivers to re-sync, and reduces - // callback rate to prevent overwhelming upstream sync algorithms. - const uint32_t spdif_callback_threshold = this->current_stream_info_.ms_to_frames(DMA_BUFFER_DURATION_MS); + // SPDIF Callback Decimation: fire every 4th DMA event (~16ms), matching non-SPDIF timing. uint32_t spdif_pending_frames = 0; int64_t spdif_pending_timestamp = 0; - uint32_t spdif_last_callback_time = millis(); - // Count DMA events for decimation uint32_t spdif_dma_event_count = 0; - uint32_t spdif_last_dma_event_time = millis(); - // Detect a stalled DMA path (many silence write attempts with zero accepted blocks). - uint32_t spdif_zero_block_streak = 0; - uint32_t spdif_last_block_progress_time = millis(); - uint32_t spdif_last_reprime_time = 0; xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::TASK_RUNNING); // SPDIF continuous mode: loop runs indefinitely, outputting silence when no audio data - // to keep the receiver synced. Exits only via break (stream info change or silence timeout). + // to keep the receiver synced. Exits only via break (stream info change, silence timeout, + // lockstep desync, dropped event, or partial-write failure). while (true) { uint32_t event_group_bits = xEventGroupGetBits(this->event_group_); if (event_group_bits & SpeakerEventGroupBits::COMMAND_STOP) { xEventGroupClearBits(this->event_group_, SpeakerEventGroupBits::COMMAND_STOP); - // In SPDIF continuous mode, don't tear down or expose STOPPED here. - // Keep the task alive and transition to silence output. + // The ISR pairs COMMAND_STOP with ERR_DROPPED_EVENT when it has to discard a completion + // event; that desyncs the lockstep queues permanently and the only safe recovery is a full + // task restart. + if (event_group_bits & SpeakerEventGroupBits::ERR_DROPPED_EVENT) { + ESP_LOGV(TAG, "Exiting: ISR dropped event, restarting to recover lockstep"); + break; + } + // User-initiated stop. In SPDIF continuous mode, transition to silence output rather + // than tearing the task down. this->spdif_silence_start_ = millis(); ESP_LOGV(TAG, "COMMAND_STOP received, continuing in silence mode"); } if (event_group_bits & SpeakerEventGroupBits::COMMAND_STOP_GRACEFULLY) { + // SPDIF continuous mode never tears the channel down on graceful stop. Clear the flag and + // let the audio simply drain through the always-fill loop into the silence-timeout path. xEventGroupClearBits(this->event_group_, SpeakerEventGroupBits::COMMAND_STOP_GRACEFULLY); - stop_gracefully = true; } if (this->audio_stream_info_ != this->current_stream_info_) { - // Audio stream info changed, stop the speaker task so it will restart with the proper settings. ESP_LOGV(TAG, "Exiting: stream info changed"); break; } + // Drain ISR completion events, popping a matching record for each. int64_t write_timestamp; + bool lockstep_broken = false; while (xQueueReceive(this->i2s_event_queue_, &write_timestamp, 0)) { - spdif_last_dma_event_time = millis(); - - // Skip frame accounting for preloaded silence buffers still draining. - // These DMA events correspond to silence that was preloaded before the - // channel was enabled, not real audio written by the task. - if (preload_buffers_remaining > 0) { - preload_buffers_remaining--; - continue; + // Lockstep: pop the matching record (real audio frames packed into this DMA block). + // Records are pushed by the task right after each successful block commit, so the FIFO + // order matches DMA completion order. Empty records queue here means lockstep broke. + uint32_t real_frames = 0; + if (xQueueReceive(this->write_records_queue_, &real_frames, 0) != pdTRUE) { + ESP_LOGV(TAG, "Event without matching write record"); + xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::ERR_LOCKSTEP_DESYNC); + lockstep_broken = true; + break; } - // Receives timing events from the I2S on_sent callback. If actual audio data was sent in this event, it passes - // on the timing info via the audio_output_callback. - uint32_t frames_sent = frames_to_fill_single_dma_buffer; - if (frames_to_fill_single_dma_buffer > frames_written) { - tx_dma_underflow = true; - frames_sent = frames_written; - const uint32_t frames_zeroed = frames_to_fill_single_dma_buffer - frames_written; + // Per-block timestamp adjustment: shift back by the silence-padding portion of the block + // so the reported timestamp reflects when the last real sample left the wire. + uint32_t frames_sent = real_frames; + if (real_frames < SPDIF_BLOCK_SAMPLES) { + const uint32_t frames_zeroed = SPDIF_BLOCK_SAMPLES - real_frames; write_timestamp -= this->current_stream_info_.frames_to_microseconds(frames_zeroed); - } else { - tx_dma_underflow = false; } - frames_written -= frames_sent; - // SPDIF Callback Decimation: fire every 4th DMA event (~16ms) - // This matches non-SPDIF timing and prevents overwhelming upstream. - if (spdif_callback_threshold > 0) { - spdif_dma_event_count++; + spdif_dma_event_count++; + // Accumulate frames; keep the latest timestamp so the callback reports when the last + // sample left the wire, not the first. + if (frames_sent > 0) { + spdif_pending_timestamp = write_timestamp; + spdif_pending_frames += frames_sent; + } - // Accumulate frames; always keep the latest timestamp so the - // callback reports when the last sample left the wire, not the first. - if (frames_sent > 0) { - spdif_pending_timestamp = write_timestamp; - spdif_pending_frames += frames_sent; - } - - // Fire callback every 4 DMA events, or on timeout if we have pending frames - bool decimation_reached = (spdif_dma_event_count >= SPDIF_DMA_EVENTS_PER_CALLBACK); - bool timeout_flush = - (spdif_pending_frames > 0) && ((millis() - spdif_last_callback_time) >= SPDIF_FLUSH_TIMEOUT_MS); - - if (decimation_reached || timeout_flush) { - if (spdif_pending_frames > 0) { - this->audio_output_callback_(spdif_pending_frames, spdif_pending_timestamp); - spdif_pending_frames = 0; - spdif_last_callback_time = millis(); - } - spdif_dma_event_count = 0; // Reset decimation counter + bool decimation_reached = (spdif_dma_event_count >= SPDIF_DMA_EVENTS_PER_CALLBACK); + // Partial blocks mark an end-of-stream boundary (silence-padded tail). Fire immediately + // so the back-shifted timestamp isn't overwritten by a later full audio block landing + // in the same decimation window. + bool partial_flush = (real_frames > 0 && real_frames < SPDIF_BLOCK_SAMPLES); + + if (decimation_reached || partial_flush) { + if (spdif_pending_frames > 0) { + this->audio_output_callback_(spdif_pending_frames, spdif_pending_timestamp); + spdif_pending_frames = 0; } + spdif_dma_event_count = 0; } } - - if (this->pause_state_) { - // Pause state is accessed atomically, so thread safe - // Delay so the task yields, then skip transferring audio data - vTaskDelay(pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS)); - continue; + if (lockstep_broken) { + ESP_LOGV(TAG, "Exiting: lockstep desync, restarting task"); + break; } - // Wait half the duration of the data already written to the DMA buffers for new audio data - // The millisecond helper modifies the frames_written variable, so use the microsecond helper and divide by 1000 - uint32_t read_delay = (this->current_stream_info_.frames_to_microseconds(frames_written) / 1000) / 2; + // Always-fill: produce exactly one SPDIF block this iteration. The blocking encoder write + // paces the task at the DMA consumption rate. + uint32_t real_frames_in_block = 0; + bool block_committed = false; + bool partial_write_failure = false; - // In SPDIF mode, if transfer buffer is empty (we're pumping silence), use a very short timeout. - // This ensures we can pump silence fast enough to keep the DMA fed (~250 blocks/sec needed). - // Otherwise the long timeout based on frames_written causes DMA to run dry. - if (transfer_buffer->available() == 0) { - read_delay = SPDIF_EMPTY_READ_DELAY_MS; - } - - size_t bytes_read = transfer_buffer->transfer_data_from_source(pdMS_TO_TICKS(read_delay)); - uint8_t *new_data = transfer_buffer->get_buffer_end() - bytes_read; - - if (bytes_read > 0) { - this->apply_software_volume_(new_data, bytes_read); - this->swap_esp32_mono_samples_(new_data, bytes_read); - } - - if (transfer_buffer->available() == 0) { - // SPDIF Continuous Silence Mode: always output valid SPDIF stream - // When no audio data, write silence-encoded blocks to keep receiver happy - if (this->spdif_encoder_ != nullptr) { - // "Graceful stop" means "drain buffered audio, then stop." In SPDIF - // continuous mode we never actually stop, so once audio is drained - // (we're here), reset the flag to re-enable silence writing and stall - // recovery. Without this, stop_gracefully stays true forever and - // blocks silence output, causing DMA to degrade on auto_clear zeros. - stop_gracefully = false; - - // Track when we entered silence mode - if (this->spdif_silence_start_ == 0) { - this->spdif_silence_start_ = millis(); + if (!this->pause_state_) { + while (real_frames_in_block < SPDIF_BLOCK_SAMPLES) { + if (transfer_buffer->available() == 0) { + size_t bytes_read = transfer_buffer->transfer_data_from_source(read_timeout_ticks); + if (bytes_read == 0) { + break; // No upstream data within the read budget; silence-pad the remainder. + } + uint8_t *new_data = transfer_buffer->get_buffer_end() - bytes_read; + this->apply_software_volume_(new_data, bytes_read); + this->swap_esp32_mono_samples_(new_data, bytes_read); } - // If silence persists past the configured timeout, stop the task - // so components expecting timeout semantics can recover. - if (this->timeout_.has_value()) { - const uint32_t silence_duration = millis() - this->spdif_silence_start_; - if (silence_duration >= this->timeout_.value()) { - ESP_LOGV(TAG, "Silence timeout reached (%" PRIu32 "ms) - stopping speaker", silence_duration); - break; - } - } + const uint32_t frames_still_needed = SPDIF_BLOCK_SAMPLES - real_frames_in_block; + const size_t bytes_still_needed = this->current_stream_info_.frames_to_bytes(frames_still_needed); + const size_t bytes_to_feed = std::min(transfer_buffer->available(), bytes_still_needed); - // First flush any partial block with silence padding (non-blocking to avoid getting stuck). - // IMPORTANT: Credit any partial block frames to frames_written so the audio_output_callback_ - // fires for them. Without this, pending_playback_frames_ in the mixer's SourceSpeaker never - // reaches 0 when a stream ends on a non-192-frame boundary, permanently blocking teardown. - if (this->spdif_encoder_->has_pending_data()) { - uint32_t partial_frames = this->spdif_encoder_->get_pending_frames(); - // Use a tiny timeout to allow DMA queue progress without stalling the task. - esp_err_t flush_err = this->spdif_encoder_->flush_with_silence(pdMS_TO_TICKS(1)); - if (flush_err == ESP_OK && partial_frames > 0) { - frames_written += partial_frames; - } - } - - // CRITICAL: In SPDIF continuous mode, ALWAYS write silence when no audio data. - // We don't check tx_dma_underflow because: - // 1. When DMA runs empty, callbacks stop, so tx_dma_underflow doesn't update - // 2. The non-blocking write handles "DMA full" gracefully (just doesn't write) - // 3. We need continuous output to prevent receiver from losing sync - if (!stop_gracefully) { - uint32_t silence_blocks = 0; - esp_err_t write_err = this->spdif_encoder_->write( - reinterpret_cast(SPDIF_SILENCE_BUFFER), sizeof(SPDIF_SILENCE_BUFFER), pdMS_TO_TICKS(1), - &silence_blocks); // Non-blocking - // Don't count silence as frames_written - it's not real audio - - // Recovery path for a stalled SPDIF TX channel: - // if silence writes repeatedly produce zero blocks AND DMA callbacks have stopped, - // re-prime DMA using preload mode. - const uint32_t ms_since_dma = millis() - spdif_last_dma_event_time; - const bool dma_events_stalled = ms_since_dma >= SPDIF_STALL_NO_DMA_MS; - if (silence_blocks > 0) { - spdif_last_block_progress_time = millis(); - } - const bool long_zero_progress = (millis() - spdif_last_block_progress_time) >= SPDIF_STALL_ZERO_PROGRESS_MS; - if (dma_events_stalled && silence_blocks == 0 && (write_err == ESP_OK || write_err == ESP_ERR_TIMEOUT)) { - spdif_zero_block_streak++; - } else { - spdif_zero_block_streak = 0; - } - - const uint32_t now_ms = millis(); - const bool reprime_cooldown_elapsed = - (spdif_last_reprime_time == 0) || ((now_ms - spdif_last_reprime_time) >= SPDIF_REPRIME_COOLDOWN_MS); - - if ((spdif_zero_block_streak >= 100 || long_zero_progress) && reprime_cooldown_elapsed) { - ESP_LOGV(TAG, "TX appears stalled, attempting DMA re-prime"); - - i2s_channel_disable(this->tx_handle_); - - const i2s_event_callbacks_t null_callbacks = {.on_sent = nullptr}; - i2s_channel_register_event_callback(this->tx_handle_, &null_callbacks, this); - - this->spdif_encoder_->set_preload_mode(true); - uint32_t preload_blocks = 0; - esp_err_t preload_err = this->spdif_encoder_->write( - reinterpret_cast(SPDIF_SILENCE_BUFFER), sizeof(SPDIF_SILENCE_BUFFER), - pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS), &preload_blocks); - this->spdif_encoder_->set_preload_mode(false); - - xQueueReset(this->i2s_event_queue_); - const i2s_event_callbacks_t callbacks = {.on_sent = i2s_on_sent_cb}; - i2s_channel_register_event_callback(this->tx_handle_, &callbacks, this); - i2s_channel_enable(this->tx_handle_); - - if (preload_err == ESP_OK && preload_blocks > 0) { - tx_dma_underflow = false; - preload_buffers_remaining = preload_blocks; - frames_written = 0; // Stale after channel disable/enable cycle - ESP_LOGV(TAG, "DMA re-prime successful (%" PRIu32 " preload blocks)", preload_blocks); - spdif_last_block_progress_time = now_ms; - } else { - ESP_LOGW(TAG, "DMA re-prime failed (%s, blocks=%" PRIu32 ")", esp_err_to_name(preload_err), - preload_blocks); - } - spdif_last_reprime_time = now_ms; - spdif_zero_block_streak = 0; - } - } - } - - if (stop_gracefully && tx_dma_underflow) { - // In SPDIF continuous mode, don't break on graceful stop during silence - // Keep outputting silence until new audio arrives or explicit COMMAND_STOP - // (handled above which transitions to silence mode rather than breaking) - } - - // In SPDIF mode, use a shorter delay to pump silence faster - // We need ~250 blocks/sec to keep DMA fed, so max 4ms per iteration - vTaskDelay(pdMS_TO_TICKS(SPDIF_SILENCE_LOOP_DELAY_MS)); - } else { - // Have audio data to write - size_t bytes_written = 0; - - // Clear silence timer since we have audio data now - if (this->spdif_silence_start_ != 0) { - uint32_t silence_duration = millis() - this->spdif_silence_start_; - if (silence_duration > 100) { - ESP_LOGV(TAG, "Exiting silence mode after %" PRIu32 "ms, have audio data", silence_duration); - } - this->spdif_silence_start_ = 0; - } - - { uint32_t blocks_sent = 0; - size_t pcm_bytes_consumed = 0; - - // Write audio data to encoder (which writes to DMA) - esp_err_t err = - this->spdif_encoder_->write(transfer_buffer->get_buffer_start(), transfer_buffer->available(), - pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS), &blocks_sent, &pcm_bytes_consumed); - if (err != ESP_OK && err != ESP_ERR_TIMEOUT) { - ESP_LOGW(TAG, "Write failed: %s", esp_err_to_name(err)); + size_t pcm_consumed = 0; + esp_err_t err = this->spdif_encoder_->write(transfer_buffer->get_buffer_start(), bytes_to_feed, + write_timeout_ticks, &blocks_sent, &pcm_consumed); + if (err != ESP_OK) { + // A failed (or timed-out) send leaves an unsent block in the encoder's stitch buffer; + // resuming would credit the next iteration's bytes against an old block. Bail and + // let loop() restart the task with a clean encoder. + xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::ERR_PARTIAL_WRITE); + partial_write_failure = true; + break; } - // Only consume source bytes that were actually accepted by the encoder. - bytes_written = pcm_bytes_consumed; - - // Update frame accounting based on complete blocks sent (192 frames per block) - if (bytes_written > 0) { - frames_written += blocks_sent * SPDIF_BLOCK_SAMPLES; - transfer_buffer->decrease_buffer_length(bytes_written); - // Audio blocks count as DMA progress for the stall detector. - // Without this, a long uninterrupted audio stream makes the - // progress timer stale, triggering a spurious re-prime the - // instant we transition to silence. - spdif_last_block_progress_time = millis(); + if (pcm_consumed > 0) { + transfer_buffer->decrease_buffer_length(pcm_consumed); + real_frames_in_block += this->current_stream_info_.bytes_to_frames(pcm_consumed); + } + if (blocks_sent > 0) { + block_committed = true; + break; } } } - } - // If we reach here, the while loop exited - either via break or condition became false - // In SPDIF mode, loop exit is expected when: - // 1. Timeout reached (user configured timeout) - // 2. Stream info changed - // Only warn if timeout is "never" since that should never exit - if (!this->timeout_.has_value()) { - ESP_LOGW(TAG, "Unexpected loop exit; set 'timeout: never' to prevent this"); + + if (partial_write_failure) { + break; + } + + if (!block_committed) { + // Pad whatever real audio we managed to feed (if any) with silence to complete one block, + // or emit a full silence block if the encoder is empty. + esp_err_t err = this->spdif_encoder_->flush_with_silence(write_timeout_ticks); + if (err != ESP_OK) { + xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::ERR_PARTIAL_WRITE); + break; + } + } + + // One block committed to DMA; push exactly one record carrying its real-audio frame count. + // Failure here means the records queue is full, which violates the lockstep invariant. + if (xQueueSendToBack(this->write_records_queue_, &real_frames_in_block, 0) != pdTRUE) { + xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::ERR_LOCKSTEP_DESYNC); + break; + } + + // Silence-timeout tracking and graceful-stop reset. + if (real_frames_in_block == 0) { + if (this->spdif_silence_start_ == 0) { + this->spdif_silence_start_ = millis(); + } + + if (this->timeout_.has_value()) { + const uint32_t silence_duration = millis() - this->spdif_silence_start_; + if (silence_duration >= this->timeout_.value()) { + ESP_LOGV(TAG, "Silence timeout reached (%" PRIu32 "ms) - stopping speaker", silence_duration); + break; + } + } + } else if (this->spdif_silence_start_ != 0) { + uint32_t silence_duration = millis() - this->spdif_silence_start_; + if (silence_duration > 100) { + ESP_LOGV(TAG, "Exiting silence mode after %" PRIu32 "ms, have audio data", silence_duration); + } + this->spdif_silence_start_ = 0; + } } } diff --git a/esphome/components/i2s_audio/speaker/i2s_audio_speaker.cpp b/esphome/components/i2s_audio/speaker/i2s_audio_speaker.cpp index f34839a314..27961050e6 100644 --- a/esphome/components/i2s_audio/speaker/i2s_audio_speaker.cpp +++ b/esphome/components/i2s_audio/speaker/i2s_audio_speaker.cpp @@ -69,6 +69,17 @@ void I2SAudioSpeakerBase::loop() { } if (event_group_bits & SpeakerEventGroupBits::TASK_STOPPING) { ESP_LOGV(TAG, "Stopping"); + // Lockstep-breaking error bits are latched by the task and cleared along with all other bits + // when TASK_STOPPED is processed; log them here, exactly once, as the task winds down. + if (event_group_bits & SpeakerEventGroupBits::ERR_DROPPED_EVENT) { + ESP_LOGE(TAG, "ISR event queue overflow, restarting speaker task to recover timestamp sync"); + } + if (event_group_bits & SpeakerEventGroupBits::ERR_PARTIAL_WRITE) { + ESP_LOGE(TAG, "Partial DMA write broke buffer alignment, restarting speaker task"); + } + if (event_group_bits & SpeakerEventGroupBits::ERR_LOCKSTEP_DESYNC) { + ESP_LOGE(TAG, "Event/record queues desynced, restarting speaker task"); + } xEventGroupClearBits(this->event_group_, SpeakerEventGroupBits::TASK_STOPPING); this->state_ = speaker::STATE_STOPPING; } @@ -87,18 +98,11 @@ void I2SAudioSpeakerBase::loop() { this->state_ = speaker::STATE_STOPPED; } - // Log any errors encountered by the task if (event_group_bits & SpeakerEventGroupBits::ERR_ESP_NO_MEM) { ESP_LOGE(TAG, "Not enough memory"); xEventGroupClearBits(this->event_group_, SpeakerEventGroupBits::ERR_ESP_NO_MEM); } - // Warn if any playback timestamp events are dropped, which drastically reduces synced playback accuracy - if (event_group_bits & SpeakerEventGroupBits::WARN_DROPPED_EVENT) { - ESP_LOGW(TAG, "Event dropped, synchronized playback accuracy is reduced"); - xEventGroupClearBits(this->event_group_, SpeakerEventGroupBits::WARN_DROPPED_EVENT); - } - // Handle the speaker's state switch (this->state_) { case speaker::STATE_STARTING: @@ -271,6 +275,22 @@ esp_err_t I2SAudioSpeakerBase::init_i2s_channel_(const i2s_chan_config_t &chan_c xQueueReset(this->i2s_event_queue_); } + // Lockstep records queue. One record per in-flight DMA buffer; sized to match the I2S event queue + // so a fully-saturated DMA pipeline cannot overflow either side before drain. + if (this->write_records_queue_ == nullptr) { + this->write_records_queue_ = xQueueCreate(event_queue_size, sizeof(uint32_t)); + } else { + xQueueReset(this->write_records_queue_); + } + + if (this->i2s_event_queue_ == nullptr || this->write_records_queue_ == nullptr) { + ESP_LOGE(TAG, "Failed to allocate I2S event queue(s)"); + i2s_del_channel(this->tx_handle_); + this->tx_handle_ = nullptr; + this->parent_->unlock(); + return ESP_ERR_NO_MEM; + } + return ESP_OK; } @@ -293,10 +313,16 @@ bool IRAM_ATTR I2SAudioSpeakerBase::i2s_on_sent_cb(i2s_chan_handle_t handle, i2s I2SAudioSpeakerBase *this_speaker = (I2SAudioSpeakerBase *) user_ctx; if (xQueueIsQueueFullFromISR(this_speaker->i2s_event_queue_)) { - // Queue is full, so discard the oldest event and set the warning flag to inform the user + // Queue is full, so discard the oldest event. Once we drop a completion event, ``i2s_event_queue_`` + // and any per-buffer record queue maintained by the task are permanently desynced, so the task + // must restart to recover. Set both ERR_DROPPED_EVENT (so loop() can log it) and COMMAND_STOP + // (so the task bails immediately, closing the race where loop() could clear the error bit + // before the task observes it). int64_t dummy; xQueueReceiveFromISR(this_speaker->i2s_event_queue_, &dummy, &need_yield1); - xEventGroupSetBitsFromISR(this_speaker->event_group_, SpeakerEventGroupBits::WARN_DROPPED_EVENT, &need_yield2); + xEventGroupSetBitsFromISR(this_speaker->event_group_, + SpeakerEventGroupBits::ERR_DROPPED_EVENT | SpeakerEventGroupBits::COMMAND_STOP, + &need_yield2); } xQueueSendToBackFromISR(this_speaker->i2s_event_queue_, &now, &need_yield3); diff --git a/esphome/components/i2s_audio/speaker/i2s_audio_speaker.h b/esphome/components/i2s_audio/speaker/i2s_audio_speaker.h index bfde455c75..c57af2775b 100644 --- a/esphome/components/i2s_audio/speaker/i2s_audio_speaker.h +++ b/esphome/components/i2s_audio/speaker/i2s_audio_speaker.h @@ -35,7 +35,11 @@ enum SpeakerEventGroupBits : uint32_t { ERR_ESP_NO_MEM = (1 << 19), - WARN_DROPPED_EVENT = (1 << 20), + ERR_DROPPED_EVENT = (1 << 20), // ISR overflowed the event queue, dropping a completion event + ERR_PARTIAL_WRITE = (1 << 21), // a DMA write returned fewer bytes than requested (or the encoder + // failed to commit a complete block), which breaks the lockstep + // invariant for every subsequent event + ERR_LOCKSTEP_DESYNC = (1 << 22), // i2s_event_queue_ and write_records_queue_ fell out of sync ALL_BITS = 0x00FFFFFF, // All valid FreeRTOS event group bits }; @@ -141,7 +145,9 @@ class I2SAudioSpeakerBase : public I2SAudioOut, public speaker::Speaker, public TaskHandle_t speaker_task_handle_{nullptr}; EventGroupHandle_t event_group_{nullptr}; + // Lockstepped DMA buffer queues: i2s_event is outgoing, write_records is incoming QueueHandle_t i2s_event_queue_{nullptr}; + QueueHandle_t write_records_queue_{nullptr}; std::weak_ptr audio_ring_buffer_; diff --git a/esphome/components/i2s_audio/speaker/spdif_encoder.cpp b/esphome/components/i2s_audio/speaker/spdif_encoder.cpp index a853f934bb..42a72346cc 100644 --- a/esphome/components/i2s_audio/speaker/spdif_encoder.cpp +++ b/esphome/components/i2s_audio/speaker/spdif_encoder.cpp @@ -358,25 +358,15 @@ HOT esp_err_t SPDIFEncoder::write(const uint8_t *src, size_t size, TickType_t ti } esp_err_t SPDIFEncoder::flush_with_silence(TickType_t ticks_to_wait) { - // First, send any pending complete block from a previous failed send - if (this->spdif_block_ptr_ >= &this->spdif_block_buf_[SPDIF_BLOCK_SIZE_U32]) { - esp_err_t err = this->send_block_(ticks_to_wait); - if (err != ESP_OK) { - return err; + // If a complete block is already pending (from a previous failed send), emit just that block. + // Otherwise pad the partial block with silence (or generate a full silence block if empty) + // and send. Always emits exactly one block on success. + if (this->spdif_block_ptr_ < &this->spdif_block_buf_[SPDIF_BLOCK_SIZE_U32]) { + static const uint8_t SILENCE[2] = {0, 0}; + while (this->spdif_block_ptr_ < &this->spdif_block_buf_[SPDIF_BLOCK_SIZE_U32]) { + this->encode_sample_(SILENCE); } } - - if (!this->has_pending_data()) { - return ESP_OK; // Nothing to flush - } - - // Encode silence (zeros) until the block is complete - static const uint8_t SILENCE[2] = {0, 0}; - - while (this->spdif_block_ptr_ < &this->spdif_block_buf_[SPDIF_BLOCK_SIZE_U32]) { - this->encode_sample_(SILENCE); - } - return this->send_block_(ticks_to_wait); } diff --git a/esphome/components/i2s_audio/speaker/spdif_encoder.h b/esphome/components/i2s_audio/speaker/spdif_encoder.h index 8516643432..8c5e068841 100644 --- a/esphome/components/i2s_audio/speaker/spdif_encoder.h +++ b/esphome/components/i2s_audio/speaker/spdif_encoder.h @@ -85,9 +85,10 @@ class SPDIFEncoder { /// @brief Check if there is a partial block pending bool has_pending_data() const { return this->spdif_block_ptr_ != this->spdif_block_buf_.get(); } - /// @brief Flush any pending partial block by padding with silence and sending + /// @brief Emit one complete SPDIF block: pad any pending partial block with silence and send, + /// or send a full silence block if nothing is pending. Always produces exactly one block on success. /// @param ticks_to_wait Timeout for blocking writes - /// @return esp_err_t as returned from the callback, or ESP_OK if nothing to flush + /// @return esp_err_t as returned from the callback esp_err_t flush_with_silence(TickType_t ticks_to_wait); /// @brief Reset the SPDIF block buffer and position tracking, discarding any partial block