diff --git a/esphome/components/i2s_audio/__init__.py b/esphome/components/i2s_audio/__init__.py index ffa63f5ee8..951b8c0498 100644 --- a/esphome/components/i2s_audio/__init__.py +++ b/esphome/components/i2s_audio/__init__.py @@ -201,7 +201,7 @@ async def register_i2s_audio_component(var, config): CONFIG_SCHEMA = cv.Schema( { cv.GenerateID(): cv.declare_id(I2SAudioComponent), - cv.Required(CONF_I2S_LRCLK_PIN): pins.internal_gpio_output_pin_number, + cv.Optional(CONF_I2S_LRCLK_PIN): pins.internal_gpio_output_pin_number, cv.Optional(CONF_I2S_BCLK_PIN): pins.internal_gpio_output_pin_number, cv.Optional(CONF_I2S_MCLK_PIN): pins.internal_gpio_output_pin_number, }, @@ -290,7 +290,8 @@ async def to_code(config): # Helps avoid callbacks being skipped due to processor load add_idf_sdkconfig_option("CONFIG_I2S_ISR_IRAM_SAFE", True) - cg.add(var.set_lrclk_pin(config[CONF_I2S_LRCLK_PIN])) + if CONF_I2S_LRCLK_PIN in config: + cg.add(var.set_lrclk_pin(config[CONF_I2S_LRCLK_PIN])) if CONF_I2S_BCLK_PIN in config: cg.add(var.set_bclk_pin(config[CONF_I2S_BCLK_PIN])) if CONF_I2S_MCLK_PIN in config: diff --git a/esphome/components/i2s_audio/speaker/__init__.py b/esphome/components/i2s_audio/speaker/__init__.py index 99aa712c68..759cc40ca9 100644 --- a/esphome/components/i2s_audio/speaker/__init__.py +++ b/esphome/components/i2s_audio/speaker/__init__.py @@ -18,10 +18,12 @@ from .. import ( CONF_I2S_DOUT_PIN, CONF_I2S_MODE, CONF_LEFT, + CONF_MCLK_MULTIPLE, CONF_MONO, CONF_PRIMARY, CONF_RIGHT, CONF_STEREO, + CONF_USE_APLL, I2SAudioOut, i2s_audio_component_schema, i2s_audio_ns, @@ -40,6 +42,15 @@ I2SAudioSpeaker = i2s_audio_ns.class_("I2SAudioSpeaker", I2SAudioSpeakerBase) CONF_DAC_TYPE = "dac_type" CONF_I2S_COMM_FMT = "i2s_comm_fmt" +CONF_SPDIF_MODE = "spdif_mode" + +I2SAudioSpeakerBase = i2s_audio_ns.class_( + "I2SAudioSpeakerBase", cg.Component, speaker.Speaker, I2SAudioOut +) +I2SAudioSpeaker = i2s_audio_ns.class_("I2SAudioSpeaker", I2SAudioSpeakerBase) +I2SAudioSpeakerSPDIF = i2s_audio_ns.class_("I2SAudioSpeakerSPDIF", I2SAudioSpeakerBase) + +I2SCommFmt = i2s_audio_ns.enum("I2SCommFmt", is_class=True) I2SCommFmt = i2s_audio_ns.enum("I2SCommFmt", is_class=True) @@ -77,7 +88,17 @@ def _set_num_channels_from_config(config): def _set_stream_limits(config): - if config[CONF_I2S_MODE] == CONF_PRIMARY: + if config.get(CONF_SPDIF_MODE, False): + # SPDIF mode: fixed to 16-bit stereo at configured sample rate + audio.set_stream_limits( + min_bits_per_sample=16, + max_bits_per_sample=16, + min_channels=2, + max_channels=2, + min_sample_rate=config.get(CONF_SAMPLE_RATE), + max_sample_rate=config.get(CONF_SAMPLE_RATE), + )(config) + elif config[CONF_I2S_MODE] == CONF_PRIMARY: # Primary mode has modifiable stream settings audio.set_stream_limits( min_bits_per_sample=8, @@ -101,6 +122,13 @@ def _set_stream_limits(config): return config +def _select_speaker_class(config): + """Override ID type when SPDIF mode is enabled.""" + if config.get(CONF_SPDIF_MODE, False): + config[CONF_ID].type = I2SAudioSpeakerSPDIF + return config + + def _validate_esp32_variant(config): variant = esp32.get_esp32_variant() if config[CONF_DAC_TYPE] == "internal": @@ -155,6 +183,7 @@ CONFIG_SCHEMA = cv.All( cv.Optional(CONF_I2S_COMM_FMT, default="stand_i2s"): cv.one_of( *I2C_COMM_FMT_OPTIONS, lower=True ), + cv.Optional(CONF_SPDIF_MODE, default=False): cv.boolean, } ), }, @@ -163,6 +192,7 @@ CONFIG_SCHEMA = cv.All( _validate_esp32_variant, _set_num_channels_from_config, _set_stream_limits, + _select_speaker_class, validate_mclk_divisible_by_3, ) @@ -175,6 +205,28 @@ def _final_validate(config): if config[CONF_I2S_COMM_FMT] == "stand_max": raise cv.Invalid("I2S standard max format is no longer supported.") + if config.get(CONF_SPDIF_MODE, False): + # SPDIF mode specific validations + if config[CONF_SAMPLE_RATE] not in [44100, 48000]: + raise cv.Invalid( + "SPDIF mode only supports 44100 Hz or 48000 Hz sample rates" + ) + if config[CONF_CHANNEL] != CONF_STEREO: + raise cv.Invalid("SPDIF mode only supports stereo channel configuration") + # bits_per_sample is converted to float by the schema + if config[CONF_BITS_PER_SAMPLE] != 16: + raise cv.Invalid("SPDIF mode only supports 16 bits per sample") + if not config[CONF_USE_APLL]: + raise cv.Invalid( + "SPDIF mode requires 'use_apll: true' for accurate clock generation" + ) + if config[CONF_I2S_MODE] != CONF_PRIMARY: + raise cv.Invalid("SPDIF mode requires 'i2s_mode: primary'") + if config[CONF_I2S_COMM_FMT] != "stand_i2s": + raise cv.Invalid("SPDIF mode requires 'i2s_comm_fmt: stand_i2s'") + if config[CONF_MCLK_MULTIPLE] != 256: + raise cv.Invalid("SPDIF mode requires 'mclk_multiple: 256'") + FINAL_VALIDATE_SCHEMA = _final_validate @@ -186,12 +238,18 @@ async def to_code(config): await speaker.register_speaker(var, config) cg.add(var.set_dout_pin(config[CONF_I2S_DOUT_PIN])) - fmt = I2SCommFmt.STANDARD # equals stand_i2s, stand_pcm_long, i2s_msb, pcm_long - if config[CONF_I2S_COMM_FMT] in ["stand_msb", "i2s_lsb"]: - fmt = I2SCommFmt.MSB - elif config[CONF_I2S_COMM_FMT] in ["stand_pcm_short", "pcm_short", "pcm"]: - fmt = I2SCommFmt.PCM - cg.add(var.set_i2s_comm_fmt(fmt)) + + is_spdif = config.get(CONF_SPDIF_MODE, False) + if is_spdif: + cg.add_define("USE_I2S_AUDIO_SPDIF_MODE") + else: + fmt = I2SCommFmt.STANDARD # equals stand_i2s, stand_pcm_long, i2s_msb, pcm_long + if config[CONF_I2S_COMM_FMT] in ["stand_msb", "i2s_lsb"]: + fmt = I2SCommFmt.MSB + elif config[CONF_I2S_COMM_FMT] in ["stand_pcm_short", "pcm_short", "pcm"]: + fmt = I2SCommFmt.PCM + cg.add(var.set_i2s_comm_fmt(fmt)) + if config[CONF_TIMEOUT] != CONF_NEVER: cg.add(var.set_timeout(config[CONF_TIMEOUT])) cg.add(var.set_buffer_duration(config[CONF_BUFFER_DURATION])) diff --git a/esphome/components/i2s_audio/speaker/i2s_audio_spdif.cpp b/esphome/components/i2s_audio/speaker/i2s_audio_spdif.cpp new file mode 100644 index 0000000000..e2146de63c --- /dev/null +++ b/esphome/components/i2s_audio/speaker/i2s_audio_spdif.cpp @@ -0,0 +1,629 @@ +#include "i2s_audio_spdif.h" + +#if defined(USE_ESP32) && defined(USE_I2S_AUDIO_SPDIF_MODE) + +#include + +#include "esphome/components/audio/audio.h" +#include "esphome/components/audio/audio_transfer_buffer.h" + +#include "esphome/core/hal.h" +#include "esphome/core/log.h" + +#include "esp_timer.h" + +namespace esphome::i2s_audio { + +static const char *const TAG = "i2s_audio.spdif"; + +// SPDIF mode adds overhead as each sample is encapsulated in a subframe; +// each DMA buffer can hold only 192 samples (~4ms each vs. ~15ms for standard I2S). +// To match the standard I2S buffering duration, we use more buffers to minimize +// the impact of the overhead, such as stuttering or audio/silence oscillation. +// 15 buffers x 4ms = 60ms of DMA buffering (same as 4 x 15ms for standard) +static constexpr size_t SPDIF_DMA_BUFFERS_COUNT = 15; + +// Timeout for flushing pending frames if no callback received. +static constexpr uint32_t SPDIF_FLUSH_TIMEOUT_MS = 20; + +// Number of DMA events between upstream callbacks (~16ms = 4 events x 4ms each). +// Matches non-SPDIF timing to prevent overwhelming upstream sync algorithms. +static constexpr uint32_t SPDIF_DMA_EVENTS_PER_CALLBACK = 4; + +// Consider TX stalled only if no DMA callbacks have arrived for this long. +// Zero-block non-blocking writes alone are not sufficient (they can happen when DMA is simply full). +static constexpr uint32_t SPDIF_STALL_NO_DMA_MS = 80; + +// Fallback stall detector: force recovery if silence writes make no forward progress for too long, +// even if occasional DMA callbacks are still observed. +static constexpr uint32_t SPDIF_STALL_ZERO_PROGRESS_MS = 1000; + +// Minimum spacing between re-prime attempts to avoid churn. +static constexpr uint32_t SPDIF_REPRIME_COOLDOWN_MS = 500; + +// Small waits used in SPDIF mode to keep DMA fed during rapid pipeline churn. +static constexpr uint32_t SPDIF_EMPTY_READ_DELAY_MS = 1; +static constexpr uint32_t SPDIF_SILENCE_LOOP_DELAY_MS = 1; +static constexpr uint32_t SPDIF_PLAY_RETRY_WAIT_MS = 5; + +static constexpr size_t SPDIF_I2S_EVENT_QUEUE_COUNT = SPDIF_DMA_BUFFERS_COUNT + 1; + +// Static silence buffer for SPDIF continuous mode +// 192 samples * 2 channels * 2 bytes per sample = 768 bytes +// Stored in flash (.rodata section) to avoid stack/heap usage +static const int16_t SPDIF_SILENCE_BUFFER[SPDIF_BLOCK_SAMPLES * 2] = {0}; + +// Static callback functions for SPDIF encoder (avoids std::function overhead) +static esp_err_t spdif_preload_cb(void *user_ctx, uint32_t *data, size_t size, TickType_t ticks_to_wait) { + auto *speaker = static_cast(user_ctx); + size_t bytes_written = 0; + esp_err_t err = i2s_channel_preload_data(speaker->get_tx_handle(), data, size, &bytes_written); + if (err != ESP_OK || bytes_written != size) { + ESP_LOGW(TAG, "Preload failed: %s (wrote %zu/%zu bytes)", esp_err_to_name(err), bytes_written, size); + return (err != ESP_OK) ? err : ESP_ERR_NO_MEM; + } + return ESP_OK; +} + +static esp_err_t spdif_write_cb(void *user_ctx, uint32_t *data, size_t size, TickType_t ticks_to_wait) { + auto *speaker = static_cast(user_ctx); + size_t bytes_written = 0; + esp_err_t err = i2s_channel_write(speaker->get_tx_handle(), data, size, &bytes_written, ticks_to_wait); + // ESP_ERR_TIMEOUT is expected under DMA backpressure in SPDIF mode. + if (err != ESP_OK && err != ESP_ERR_TIMEOUT) { + ESP_LOGW(TAG, "I2S write failed: %s (wrote %zu/%zu bytes)", esp_err_to_name(err), bytes_written, size); + } + return err; +} + +void I2SAudioSpeakerSPDIF::setup() { + I2SAudioSpeakerBase::setup(); + if (this->is_failed()) { + return; + } + + this->spdif_encoder_ = new SPDIFEncoder(); + if (!this->spdif_encoder_->setup()) { + ESP_LOGE(TAG, "Encoder setup failed"); + this->mark_failed(); + return; + } + + // Configure channel status block with the sample rate + this->spdif_encoder_->set_sample_rate(this->sample_rate_); + + // Separate callbacks for preload (during underflow recovery) and normal writes + this->spdif_encoder_->set_preload_callback(spdif_preload_cb, this); + this->spdif_encoder_->set_write_callback(spdif_write_cb, this); +} + +void I2SAudioSpeakerSPDIF::dump_config() { + I2SAudioSpeakerBase::dump_config(); + ESP_LOGCONFIG(TAG, + " SPDIF Mode: YES\n" + " Sample Rate: %" PRIu32 " Hz", + this->sample_rate_); +} + +void I2SAudioSpeakerSPDIF::on_task_stopped() { this->spdif_silence_start_ = 0; } + +size_t I2SAudioSpeakerSPDIF::play(const uint8_t *data, size_t length, TickType_t ticks_to_wait) { + if (this->is_failed()) { + ESP_LOGE(TAG, "Setup failed; cannot play audio"); + return 0; + } + + // In SPDIF mode, keep accepting upstream audio while the speaker task is active. + // This avoids transient drops during stop/start transitions. + const bool task_active = (this->speaker_task_handle_ != nullptr); + + if (this->state_ != speaker::STATE_RUNNING && this->state_ != speaker::STATE_STARTING) { + this->start(); + } + + if (!task_active && this->state_ != speaker::STATE_RUNNING) { + // Unable to write data to a running speaker, so delay the max amount of time so it can get ready + vTaskDelay(ticks_to_wait); + ticks_to_wait = 0; + } + + size_t bytes_written = 0; + if (this->state_ == speaker::STATE_RUNNING || task_active) { + std::shared_ptr temp_ring_buffer = this->audio_ring_buffer_.lock(); + if (temp_ring_buffer != nullptr) { + // In SPDIF mode, a tiny wait helps avoid transient 0-byte writes during short backpressure windows. + TickType_t effective_ticks_to_wait = ticks_to_wait; + if (effective_ticks_to_wait == 0) { + effective_ticks_to_wait = pdMS_TO_TICKS(1); + } + bytes_written = temp_ring_buffer->write_without_replacement((void *) data, length, effective_ticks_to_wait); + if (bytes_written == 0 && length > 0) { + // Retry once to catch short free-space windows during rapid seek/track transitions. + bytes_written = + temp_ring_buffer->write_without_replacement((void *) data, length, pdMS_TO_TICKS(SPDIF_PLAY_RETRY_WAIT_MS)); + } + } + } + + return bytes_written; +} + +void I2SAudioSpeakerSPDIF::run_speaker_task() { + xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::TASK_STARTING); + + // Reset SPDIF encoder at task start to ensure clean state + // (previous task may have left stale data in encoder buffer) + if (this->spdif_encoder_ != nullptr) { + this->spdif_encoder_->reset(); + } + + const uint32_t dma_buffers_duration_ms = DMA_BUFFER_DURATION_MS * SPDIF_DMA_BUFFERS_COUNT; + // Ensure ring buffer duration is at least the duration of all DMA buffers + const uint32_t ring_buffer_duration = std::max(dma_buffers_duration_ms, this->buffer_duration_ms_); + + // The DMA buffers may have more bits per sample, so calculate buffer sizes based on the input audio stream info + const size_t ring_buffer_size = this->current_stream_info_.ms_to_bytes(ring_buffer_duration); + + // For SPDIF mode, one DMA buffer = one SPDIF block = 192 PCM frames + const uint32_t frames_to_fill_single_dma_buffer = SPDIF_BLOCK_SAMPLES; + const size_t bytes_to_fill_single_dma_buffer = + this->current_stream_info_.frames_to_bytes(frames_to_fill_single_dma_buffer); + + bool successful_setup = false; + std::unique_ptr transfer_buffer = + audio::AudioSourceTransferBuffer::create(bytes_to_fill_single_dma_buffer); + + if (transfer_buffer != nullptr) { + std::shared_ptr temp_ring_buffer = ring_buffer::RingBuffer::create(ring_buffer_size); + if (temp_ring_buffer.use_count() == 1) { + transfer_buffer->set_source(temp_ring_buffer); + this->audio_ring_buffer_ = temp_ring_buffer; + successful_setup = true; + } + } + + if (!successful_setup) { + xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::ERR_ESP_NO_MEM); + } else { + // Preload DMA buffers with SPDIF-encoded silence before enabling the channel. + // This ensures the first data transmitted is valid SPDIF (not raw zeros from + // auto_clear) and prevents phantom DMA events before real audio is available. + // Track how many buffers were preloaded so the DMA event loop can skip + // frame accounting until the preloaded silence has fully drained. + uint32_t preload_buffers_remaining = 0; + this->spdif_encoder_->set_preload_mode(true); + for (size_t i = 0; i < SPDIF_DMA_BUFFERS_COUNT; i++) { + uint32_t preload_blocks = 0; + esp_err_t preload_err = this->spdif_encoder_->write(reinterpret_cast(SPDIF_SILENCE_BUFFER), + sizeof(SPDIF_SILENCE_BUFFER), + pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS), &preload_blocks); + if (preload_err != ESP_OK || preload_blocks == 0) { + break; // DMA buffers full or error + } + preload_buffers_remaining += preload_blocks; + } + this->spdif_encoder_->set_preload_mode(false); + this->spdif_encoder_->reset(); // Clean encoder state for the main loop + + // Now register the callback and enable the channel + xQueueReset(this->i2s_event_queue_); + const i2s_event_callbacks_t callbacks = {.on_sent = i2s_on_sent_cb}; + i2s_channel_register_event_callback(this->tx_handle_, &callbacks, this); + i2s_channel_enable(this->tx_handle_); + + bool stop_gracefully = false; + bool tx_dma_underflow = true; + + uint32_t frames_written = 0; + + // SPDIF Continuous Silence Mode + Callback Decimation + // + // Key principles: + // 1. NEVER stop the I2S channel - always output a valid SPDIF stream + // 2. When no audio data, output silence-encoded SPDIF blocks (not zeros!) + // 3. Fire callbacks every 4 DMA events (~16ms), matching non-SPDIF timing + // + // This eliminates gaps that cause SPDIF receivers to re-sync, and reduces + // callback rate to prevent overwhelming upstream sync algorithms. + const uint32_t spdif_callback_threshold = this->current_stream_info_.ms_to_frames(DMA_BUFFER_DURATION_MS); + uint32_t spdif_pending_frames = 0; + int64_t spdif_pending_timestamp = 0; + uint32_t spdif_last_callback_time = millis(); + // Count DMA events for decimation + uint32_t spdif_dma_event_count = 0; + uint32_t spdif_last_dma_event_time = millis(); + // Detect a stalled DMA path (many silence write attempts with zero accepted blocks). + uint32_t spdif_zero_block_streak = 0; + uint32_t spdif_last_block_progress_time = millis(); + uint32_t spdif_last_reprime_time = 0; + + xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::TASK_RUNNING); + + // SPDIF continuous mode: loop runs indefinitely, outputting silence when no audio data + // to keep the receiver synced. Exits only via break (stream info change or silence timeout). + while (true) { + uint32_t event_group_bits = xEventGroupGetBits(this->event_group_); + + if (event_group_bits & SpeakerEventGroupBits::COMMAND_STOP) { + xEventGroupClearBits(this->event_group_, SpeakerEventGroupBits::COMMAND_STOP); + // In SPDIF continuous mode, don't tear down or expose STOPPED here. + // Keep the task alive and transition to silence output. + this->spdif_silence_start_ = millis(); + ESP_LOGV(TAG, "COMMAND_STOP received, continuing in silence mode"); + } + if (event_group_bits & SpeakerEventGroupBits::COMMAND_STOP_GRACEFULLY) { + xEventGroupClearBits(this->event_group_, SpeakerEventGroupBits::COMMAND_STOP_GRACEFULLY); + stop_gracefully = true; + } + + if (this->audio_stream_info_ != this->current_stream_info_) { + // Audio stream info changed, stop the speaker task so it will restart with the proper settings. + ESP_LOGV(TAG, "Exiting: stream info changed"); + break; + } + + int64_t write_timestamp; + while (xQueueReceive(this->i2s_event_queue_, &write_timestamp, 0)) { + spdif_last_dma_event_time = millis(); + + // Skip frame accounting for preloaded silence buffers still draining. + // These DMA events correspond to silence that was preloaded before the + // channel was enabled, not real audio written by the task. + if (preload_buffers_remaining > 0) { + preload_buffers_remaining--; + continue; + } + + // Receives timing events from the I2S on_sent callback. If actual audio data was sent in this event, it passes + // on the timing info via the audio_output_callback. + uint32_t frames_sent = frames_to_fill_single_dma_buffer; + if (frames_to_fill_single_dma_buffer > frames_written) { + tx_dma_underflow = true; + frames_sent = frames_written; + const uint32_t frames_zeroed = frames_to_fill_single_dma_buffer - frames_written; + write_timestamp -= this->current_stream_info_.frames_to_microseconds(frames_zeroed); + } else { + tx_dma_underflow = false; + } + frames_written -= frames_sent; + + // SPDIF Callback Decimation: fire every 4th DMA event (~16ms) + // This matches non-SPDIF timing and prevents overwhelming upstream. + if (spdif_callback_threshold > 0) { + spdif_dma_event_count++; + + // Accumulate frames; always keep the latest timestamp so the + // callback reports when the last sample left the wire, not the first. + if (frames_sent > 0) { + spdif_pending_timestamp = write_timestamp; + spdif_pending_frames += frames_sent; + } + + // Fire callback every 4 DMA events, or on timeout if we have pending frames + bool decimation_reached = (spdif_dma_event_count >= SPDIF_DMA_EVENTS_PER_CALLBACK); + bool timeout_flush = + (spdif_pending_frames > 0) && ((millis() - spdif_last_callback_time) >= SPDIF_FLUSH_TIMEOUT_MS); + + if (decimation_reached || timeout_flush) { + if (spdif_pending_frames > 0) { + this->audio_output_callback_(spdif_pending_frames, spdif_pending_timestamp); + spdif_pending_frames = 0; + spdif_last_callback_time = millis(); + } + spdif_dma_event_count = 0; // Reset decimation counter + } + } + } + + if (this->pause_state_) { + // Pause state is accessed atomically, so thread safe + // Delay so the task yields, then skip transferring audio data + vTaskDelay(pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS)); + continue; + } + + // Wait half the duration of the data already written to the DMA buffers for new audio data + // The millisecond helper modifies the frames_written variable, so use the microsecond helper and divide by 1000 + uint32_t read_delay = (this->current_stream_info_.frames_to_microseconds(frames_written) / 1000) / 2; + + // In SPDIF mode, if transfer buffer is empty (we're pumping silence), use a very short timeout. + // This ensures we can pump silence fast enough to keep the DMA fed (~250 blocks/sec needed). + // Otherwise the long timeout based on frames_written causes DMA to run dry. + if (transfer_buffer->available() == 0) { + read_delay = SPDIF_EMPTY_READ_DELAY_MS; + } + + size_t bytes_read = transfer_buffer->transfer_data_from_source(pdMS_TO_TICKS(read_delay)); + uint8_t *new_data = transfer_buffer->get_buffer_end() - bytes_read; + + if (bytes_read > 0) { + this->apply_software_volume_(new_data, bytes_read); + this->swap_esp32_mono_samples_(new_data, bytes_read); + } + + if (transfer_buffer->available() == 0) { + // SPDIF Continuous Silence Mode: always output valid SPDIF stream + // When no audio data, write silence-encoded blocks to keep receiver happy + if (this->spdif_encoder_ != nullptr) { + // "Graceful stop" means "drain buffered audio, then stop." In SPDIF + // continuous mode we never actually stop, so once audio is drained + // (we're here), reset the flag to re-enable silence writing and stall + // recovery. Without this, stop_gracefully stays true forever and + // blocks silence output, causing DMA to degrade on auto_clear zeros. + stop_gracefully = false; + + // Track when we entered silence mode + if (this->spdif_silence_start_ == 0) { + this->spdif_silence_start_ = millis(); + } + + // If silence persists past the configured timeout, stop the task + // so components expecting timeout semantics can recover. + if (this->timeout_.has_value()) { + const uint32_t silence_duration = millis() - this->spdif_silence_start_; + if (silence_duration >= this->timeout_.value()) { + ESP_LOGV(TAG, "Silence timeout reached (%" PRIu32 "ms) - stopping speaker", silence_duration); + break; + } + } + + // First flush any partial block with silence padding (non-blocking to avoid getting stuck). + // IMPORTANT: Credit any partial block frames to frames_written so the audio_output_callback_ + // fires for them. Without this, pending_playback_frames_ in the mixer's SourceSpeaker never + // reaches 0 when a stream ends on a non-192-frame boundary, permanently blocking teardown. + if (this->spdif_encoder_->has_pending_data()) { + uint32_t partial_frames = this->spdif_encoder_->get_pending_frames(); + // Use a tiny timeout to allow DMA queue progress without stalling the task. + esp_err_t flush_err = this->spdif_encoder_->flush_with_silence(pdMS_TO_TICKS(1)); + if (flush_err == ESP_OK && partial_frames > 0) { + frames_written += partial_frames; + } + } + + // CRITICAL: In SPDIF continuous mode, ALWAYS write silence when no audio data. + // We don't check tx_dma_underflow because: + // 1. When DMA runs empty, callbacks stop, so tx_dma_underflow doesn't update + // 2. The non-blocking write handles "DMA full" gracefully (just doesn't write) + // 3. We need continuous output to prevent receiver from losing sync + if (!stop_gracefully) { + uint32_t silence_blocks = 0; + esp_err_t write_err = this->spdif_encoder_->write( + reinterpret_cast(SPDIF_SILENCE_BUFFER), sizeof(SPDIF_SILENCE_BUFFER), pdMS_TO_TICKS(1), + &silence_blocks); // Non-blocking + // Don't count silence as frames_written - it's not real audio + + // Recovery path for a stalled SPDIF TX channel: + // if silence writes repeatedly produce zero blocks AND DMA callbacks have stopped, + // re-prime DMA using preload mode. + const uint32_t ms_since_dma = millis() - spdif_last_dma_event_time; + const bool dma_events_stalled = ms_since_dma >= SPDIF_STALL_NO_DMA_MS; + if (silence_blocks > 0) { + spdif_last_block_progress_time = millis(); + } + const bool long_zero_progress = (millis() - spdif_last_block_progress_time) >= SPDIF_STALL_ZERO_PROGRESS_MS; + if (dma_events_stalled && silence_blocks == 0 && (write_err == ESP_OK || write_err == ESP_ERR_TIMEOUT)) { + spdif_zero_block_streak++; + } else { + spdif_zero_block_streak = 0; + } + + const uint32_t now_ms = millis(); + const bool reprime_cooldown_elapsed = + (spdif_last_reprime_time == 0) || ((now_ms - spdif_last_reprime_time) >= SPDIF_REPRIME_COOLDOWN_MS); + + if ((spdif_zero_block_streak >= 100 || long_zero_progress) && reprime_cooldown_elapsed) { + ESP_LOGV(TAG, "TX appears stalled, attempting DMA re-prime"); + + i2s_channel_disable(this->tx_handle_); + + const i2s_event_callbacks_t null_callbacks = {.on_sent = nullptr}; + i2s_channel_register_event_callback(this->tx_handle_, &null_callbacks, this); + + this->spdif_encoder_->set_preload_mode(true); + uint32_t preload_blocks = 0; + esp_err_t preload_err = this->spdif_encoder_->write( + reinterpret_cast(SPDIF_SILENCE_BUFFER), sizeof(SPDIF_SILENCE_BUFFER), + pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS), &preload_blocks); + this->spdif_encoder_->set_preload_mode(false); + + xQueueReset(this->i2s_event_queue_); + const i2s_event_callbacks_t callbacks = {.on_sent = i2s_on_sent_cb}; + i2s_channel_register_event_callback(this->tx_handle_, &callbacks, this); + i2s_channel_enable(this->tx_handle_); + + if (preload_err == ESP_OK && preload_blocks > 0) { + tx_dma_underflow = false; + preload_buffers_remaining = preload_blocks; + frames_written = 0; // Stale after channel disable/enable cycle + ESP_LOGV(TAG, "DMA re-prime successful (%" PRIu32 " preload blocks)", preload_blocks); + spdif_last_block_progress_time = now_ms; + } else { + ESP_LOGW(TAG, "DMA re-prime failed (%s, blocks=%" PRIu32 ")", esp_err_to_name(preload_err), + preload_blocks); + } + spdif_last_reprime_time = now_ms; + spdif_zero_block_streak = 0; + } + } + } + + if (stop_gracefully && tx_dma_underflow) { + // In SPDIF continuous mode, don't break on graceful stop during silence + // Keep outputting silence until new audio arrives or explicit COMMAND_STOP + // (handled above which transitions to silence mode rather than breaking) + } + + // In SPDIF mode, use a shorter delay to pump silence faster + // We need ~250 blocks/sec to keep DMA fed, so max 4ms per iteration + vTaskDelay(pdMS_TO_TICKS(SPDIF_SILENCE_LOOP_DELAY_MS)); + } else { + // Have audio data to write + size_t bytes_written = 0; + + // Clear silence timer since we have audio data now + if (this->spdif_silence_start_ != 0) { + uint32_t silence_duration = millis() - this->spdif_silence_start_; + if (silence_duration > 100) { + ESP_LOGV(TAG, "Exiting silence mode after %" PRIu32 "ms, have audio data", silence_duration); + } + this->spdif_silence_start_ = 0; + } + + { + uint32_t blocks_sent = 0; + size_t pcm_bytes_consumed = 0; + + // Write audio data to encoder (which writes to DMA) + esp_err_t err = + this->spdif_encoder_->write(transfer_buffer->get_buffer_start(), transfer_buffer->available(), + pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS), &blocks_sent, &pcm_bytes_consumed); + if (err != ESP_OK && err != ESP_ERR_TIMEOUT) { + ESP_LOGW(TAG, "Write failed: %s", esp_err_to_name(err)); + } + + // Only consume source bytes that were actually accepted by the encoder. + bytes_written = pcm_bytes_consumed; + + // Update frame accounting based on complete blocks sent (192 frames per block) + if (bytes_written > 0) { + frames_written += blocks_sent * SPDIF_BLOCK_SAMPLES; + transfer_buffer->decrease_buffer_length(bytes_written); + // Audio blocks count as DMA progress for the stall detector. + // Without this, a long uninterrupted audio stream makes the + // progress timer stale, triggering a spurious re-prime the + // instant we transition to silence. + spdif_last_block_progress_time = millis(); + } + } + } + } + // If we reach here, the while loop exited - either via break or condition became false + // In SPDIF mode, loop exit is expected when: + // 1. Timeout reached (user configured timeout) + // 2. Stream info changed + // Only warn if timeout is "never" since that should never exit + if (!this->timeout_.has_value()) { + ESP_LOGW(TAG, "Unexpected loop exit; set 'timeout: never' to prevent this"); + } + } + + xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::TASK_STOPPING); + + // Reset SPDIF encoder state to prevent stale state on next start + if (this->spdif_encoder_ != nullptr) { + this->spdif_encoder_->set_preload_mode(false); + this->spdif_encoder_->reset(); + } + + if (transfer_buffer != nullptr) { + transfer_buffer.reset(); + } + + xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::TASK_STOPPED); + + while (true) { + // Continuously delay until the loop method deletes the task + vTaskDelay(pdMS_TO_TICKS(10)); + } +} + +esp_err_t I2SAudioSpeakerSPDIF::start_i2s_driver(audio::AudioStreamInfo &audio_stream_info) { + this->current_stream_info_ = audio_stream_info; + + // SPDIF mode validation + if (this->sample_rate_ != audio_stream_info.get_sample_rate()) { + ESP_LOGE(TAG, "Only supports a single sample rate (configured: %" PRIu32 " Hz, stream: %" PRIu32 " Hz)", + this->sample_rate_, audio_stream_info.get_sample_rate()); + return ESP_ERR_NOT_SUPPORTED; + } + if (audio_stream_info.get_bits_per_sample() != 16) { + ESP_LOGE(TAG, "Only supports 16 bits per sample"); + return ESP_ERR_NOT_SUPPORTED; + } + if (audio_stream_info.get_channels() != 2) { + ESP_LOGE(TAG, "Only supports stereo (2 channels)"); + return ESP_ERR_NOT_SUPPORTED; + } + + if (this->slot_bit_width_ != I2S_SLOT_BIT_WIDTH_AUTO && + (i2s_slot_bit_width_t) audio_stream_info.get_bits_per_sample() > this->slot_bit_width_) { + ESP_LOGE(TAG, "Stream bits per sample must be less than or equal to the speaker's configuration"); + return ESP_ERR_NOT_SUPPORTED; + } + + if (!this->parent_->try_lock()) { + ESP_LOGE(TAG, "Parent bus is busy"); + return ESP_ERR_INVALID_STATE; + } + + i2s_clock_src_t clk_src = I2S_CLK_SRC_DEFAULT; + +#if SOC_CLK_APLL_SUPPORTED + if (this->use_apll_) { + clk_src = i2s_clock_src_t::I2S_CLK_SRC_APLL; + } +#endif // SOC_CLK_APLL_SUPPORTED + + // SPDIF mode: fixed configuration for BMC encoding + // For new driver, dma_frame_num is in I2S frames (8 bytes each for 32-bit stereo) + uint32_t dma_buffer_length = SPDIF_BLOCK_I2S_FRAMES; // One SPDIF block = 384 I2S frames = 3072 bytes + + // Log DMA configuration for debugging + ESP_LOGV(TAG, "I2S DMA config: %zu buffers x %lu frames = %lu bytes total", (size_t) SPDIF_DMA_BUFFERS_COUNT, + (unsigned long) dma_buffer_length, + (unsigned long) (SPDIF_DMA_BUFFERS_COUNT * dma_buffer_length * 8)); // 8 bytes per frame for 32-bit stereo + + i2s_chan_config_t chan_cfg = { + .id = this->parent_->get_port(), + .role = this->i2s_role_, + .dma_desc_num = SPDIF_DMA_BUFFERS_COUNT, + .dma_frame_num = dma_buffer_length, + .auto_clear = true, + .intr_priority = 3, + }; + + // SPDIF: double sample rate for BMC, 32-bit stereo, only data pin needed + i2s_std_clk_config_t clk_cfg = { + .sample_rate_hz = this->sample_rate_ * 2, + .clk_src = clk_src, + .mclk_multiple = this->mclk_multiple_, + }; + + i2s_std_slot_config_t slot_cfg = I2S_STD_PHILIPS_SLOT_DEFAULT_CONFIG(I2S_DATA_BIT_WIDTH_32BIT, I2S_SLOT_MODE_STEREO); + + i2s_std_gpio_config_t gpio_cfg = { + .mclk = GPIO_NUM_NC, + .bclk = GPIO_NUM_NC, + .ws = GPIO_NUM_NC, + .dout = this->dout_pin_, + .din = GPIO_NUM_NC, + .invert_flags = + { + .mclk_inv = false, + .bclk_inv = false, + .ws_inv = false, + }, + }; + + i2s_std_config_t std_cfg = { + .clk_cfg = clk_cfg, + .slot_cfg = slot_cfg, + .gpio_cfg = gpio_cfg, + }; + + esp_err_t err = this->init_i2s_channel_(chan_cfg, std_cfg, SPDIF_I2S_EVENT_QUEUE_COUNT); + if (err != ESP_OK) { + return err; + } + + // Channel is NOT enabled here. The speaker task will preload DMA buffers + // with SPDIF-encoded silence before enabling, ensuring the first data on + // the wire is valid SPDIF (not raw zeros from auto_clear) and preventing + // phantom DMA events before real audio data is available. + + return ESP_OK; +} + +} // namespace esphome::i2s_audio + +#endif // USE_ESP32 && USE_I2S_AUDIO_SPDIF_MODE diff --git a/esphome/components/i2s_audio/speaker/i2s_audio_spdif.h b/esphome/components/i2s_audio/speaker/i2s_audio_spdif.h new file mode 100644 index 0000000000..ca7774123b --- /dev/null +++ b/esphome/components/i2s_audio/speaker/i2s_audio_spdif.h @@ -0,0 +1,34 @@ +#pragma once + +#include "esphome/core/defines.h" + +#if defined(USE_ESP32) && defined(USE_I2S_AUDIO_SPDIF_MODE) + +#include "i2s_audio_speaker.h" +#include "spdif_encoder.h" + +namespace esphome::i2s_audio { + +/// @brief SPDIF speaker implementation. +/// Encodes PCM audio into IEC 60958-3 S/PDIF bitstream using BMC encoding, +/// outputting through a single I2S data pin. Maintains continuous output +/// (silence when no audio) to keep SPDIF receivers synchronized. +class I2SAudioSpeakerSPDIF : public I2SAudioSpeakerBase { + public: + void setup() override; + void dump_config() override; + + size_t play(const uint8_t *data, size_t length, TickType_t ticks_to_wait) override; + + protected: + void run_speaker_task() override; + esp_err_t start_i2s_driver(audio::AudioStreamInfo &audio_stream_info) override; + void on_task_stopped() override; + + SPDIFEncoder *spdif_encoder_{nullptr}; + uint32_t spdif_silence_start_{0}; // Timestamp when silence mode started (0 = not in silence) +}; + +} // namespace esphome::i2s_audio + +#endif // USE_ESP32 && USE_I2S_AUDIO_SPDIF_MODE diff --git a/esphome/components/i2s_audio/speaker/i2s_audio_speaker.cpp b/esphome/components/i2s_audio/speaker/i2s_audio_speaker.cpp index a71b7db3ba..f34839a314 100644 --- a/esphome/components/i2s_audio/speaker/i2s_audio_speaker.cpp +++ b/esphome/components/i2s_audio/speaker/i2s_audio_speaker.cpp @@ -257,7 +257,7 @@ esp_err_t I2SAudioSpeakerBase::init_i2s_channel_(const i2s_chan_config_t &chan_c err = i2s_channel_init_std_mode(this->tx_handle_, &std_cfg); if (err != ESP_OK) { - ESP_LOGE(TAG, "Failed to initialize channel"); + ESP_LOGE(TAG, "Failed to initialize I2S channel"); i2s_del_channel(this->tx_handle_); this->tx_handle_ = nullptr; this->parent_->unlock(); diff --git a/esphome/components/i2s_audio/speaker/i2s_audio_speaker.h b/esphome/components/i2s_audio/speaker/i2s_audio_speaker.h index c598ca1bf8..bfde455c75 100644 --- a/esphome/components/i2s_audio/speaker/i2s_audio_speaker.h +++ b/esphome/components/i2s_audio/speaker/i2s_audio_speaker.h @@ -18,7 +18,7 @@ namespace esphome::i2s_audio { -// Shared constants for I2S audio speaker implementations +// Shared constants used by both standard and SPDIF speaker implementations static constexpr uint32_t DMA_BUFFER_DURATION_MS = 15; static constexpr size_t TASK_STACK_SIZE = 4096; static constexpr ssize_t TASK_PRIORITY = 19; @@ -42,7 +42,7 @@ enum SpeakerEventGroupBits : uint32_t { /// @brief Abstract base class for I2S audio speaker implementations. /// Provides shared infrastructure (event groups, ring buffer, volume control, task lifecycle) -/// for derived I2S speaker classes. +/// for derived standard I2S and SPDIF speaker classes. class I2SAudioSpeakerBase : public I2SAudioOut, public speaker::Speaker, public Component { public: float get_setup_priority() const override { return esphome::setup_priority::PROCESSOR; } diff --git a/esphome/components/i2s_audio/speaker/spdif_encoder.cpp b/esphome/components/i2s_audio/speaker/spdif_encoder.cpp new file mode 100644 index 0000000000..a853f934bb --- /dev/null +++ b/esphome/components/i2s_audio/speaker/spdif_encoder.cpp @@ -0,0 +1,385 @@ +#include "spdif_encoder.h" + +#if defined(USE_ESP32) && defined(USE_I2S_AUDIO_SPDIF_MODE) + +#include "esphome/core/log.h" + +namespace esphome::i2s_audio { + +static const char *const TAG = "i2s_audio.spdif_encoder"; + +// S/PDIF preamble patterns (8 BMC bits each) +// These are the BMC-encoded sync patterns that violate normal BMC rules for easy detection. +// All preambles end at phase HIGH (last bit = 1), enabling consistent data encoding. +// Preamble is placed at bits 24-31 of word[0] for MSB-first transmission. +static constexpr uint8_t PREAMBLE_B = 0x17; // Block start (left channel, frame 0) +static constexpr uint8_t PREAMBLE_M = 0x1d; // Left channel (not block start) +static constexpr uint8_t PREAMBLE_W = 0x1b; // Right channel + +// BMC encoding of 4 zero bits starting at phase HIGH: 00_11_00_11 = 0x33 +// Since both aux nibbles (bits 4-7, 8-11) are zero for 16-bit audio and phase is preserved, both are 0x33. +static constexpr uint32_t BMC_ZERO_NIBBLE = 0x33; + +// Constexpr BMC encoder for compile-time LUT generation. +// Encodes with start phase=true (HIGH). The complement property allows phase=false +// via XOR: bmc_encode(v, N, false) == bmc_encode(v, N, true) ^ mask +static constexpr uint16_t bmc_lut_encode(uint32_t data, uint8_t num_bits) { + uint16_t bmc = 0; + bool phase = true; + for (uint8_t i = 0; i < num_bits; i++) { + bool bit = (data >> i) & 1; + uint8_t bmc_pair = phase ? (bit ? 0b01 : 0b00) : (bit ? 0b10 : 0b11); + bmc |= static_cast(bmc_pair) << ((num_bits - 1 - i) * 2); + if (!bit) + phase = !phase; + } + return bmc; +} + +// 4-bit BMC lookup table: 16 entries (16 bytes in flash) +// Index: 4-bit data value (0-15), always phase=true start +static constexpr auto BMC_LUT_4 = [] { + std::array t{}; + for (uint32_t i = 0; i < 16; i++) + t[i] = static_cast(bmc_lut_encode(i, 4)); + return t; +}(); + +// 8-bit BMC lookup table: 256 entries (512 bytes in flash) +// Index: 8-bit data value (0-255), always phase=true start +static constexpr auto BMC_LUT_8 = [] { + std::array t{}; + for (uint32_t i = 0; i < 256; i++) + t[i] = bmc_lut_encode(i, 8); + return t; +}(); + +// Initialize S/PDIF buffer +bool SPDIFEncoder::setup() { + this->spdif_block_buf_ = std::make_unique(SPDIF_BLOCK_SIZE_U32); + if (!this->spdif_block_buf_) { + ESP_LOGE(TAG, "Buffer allocation failed (%zu bytes)", SPDIF_BLOCK_SIZE_BYTES); + return false; + } + ESP_LOGV(TAG, "Buffer allocated (%zu bytes)", SPDIF_BLOCK_SIZE_BYTES); + + // Build initial channel status block with default sample rate + this->build_channel_status_(); + + this->reset(); + return true; +} + +void SPDIFEncoder::reset() { + this->spdif_block_ptr_ = this->spdif_block_buf_.get(); + this->frame_in_block_ = 0; + this->is_left_channel_ = true; +} + +void SPDIFEncoder::set_sample_rate(uint32_t sample_rate) { + if (this->sample_rate_ != sample_rate) { + this->sample_rate_ = sample_rate; + this->build_channel_status_(); + ESP_LOGD(TAG, "Sample rate set to %lu Hz", (unsigned long) sample_rate); + } +} + +void SPDIFEncoder::build_channel_status_() { + // IEC 60958-3 Consumer Channel Status Block (192 bits = 24 bytes) + // Transmitted LSB-first within each byte, one bit per frame via C bit + // + // Byte 0: Control bits + // Bit 0: 0 = Consumer format (not professional AES3) + // Bit 1: 0 = PCM audio (not non-audio data like AC3) + // Bit 2: 0 = No copyright assertion + // Bits 3-5: 000 = No pre-emphasis + // Bits 6-7: 00 = Mode 0 (basic consumer format) + // + // Byte 1: Category code (0x00 = general, 0x01 = CD, etc.) + // + // Byte 2: Source/channel numbers + // Bits 0-3: Source number (0 = unspecified) + // Bits 4-7: Channel number (0 = unspecified) + // + // Byte 3: Sample frequency and clock accuracy + // Bits 0-3: Sample frequency code + // Bits 4-5: Clock accuracy (00 = Level II, ±1000 ppm, appropriate for ESP32) + // Bits 6-7: Reserved (0) + // + // Bytes 4-23: Reserved (zeros for basic compliance) + + // Clear all bytes first + this->channel_status_.fill(0); + + // Byte 0: Consumer, PCM audio, no copyright, no pre-emphasis, Mode 0 + // All bits are 0, which is already set + + // Byte 1: Category code = 0x00 (general) + // Already 0 + + // Byte 2: Source/channel unspecified + // Already 0 + + // Byte 3: Sample frequency code (bits 0-3) + clock accuracy (bits 4-5) + // Clock accuracy = 00 (Level II, ±1000 ppm) - appropriate for ESP32 + uint8_t freq_code; + switch (this->sample_rate_) { + case 44100: + freq_code = 0x0; // 0000 + break; + case 48000: + freq_code = 0x2; // 0010 + break; + default: + // Other values are possible but they're not supported by ESPHome + freq_code = 0x1; // 0001 = not indicated + ESP_LOGW(TAG, "Unsupported sample rate %lu Hz, channel status will indicate 'not specified'", + (unsigned long) this->sample_rate_); + break; + } + // Byte 3: freq_code in bits 0-3, clock accuracy (00) in bits 4-5 + this->channel_status_[3] = freq_code; // Clock accuracy bits 4-5 are already 0 + + // Bytes 4-23 remain zero (word length not specified, no original sample freq, etc.) +} + +HOT void SPDIFEncoder::encode_sample_(const uint8_t *pcm_sample) { + // ============================================================================ + // Build raw 32-bit subframe (IEC 60958 format) + // ============================================================================ + // Bit layout: + // Bits 0-3: Preamble (handled separately, not in raw_subframe) + // Bits 4-7: Auxiliary audio data (zeros for 16-bit audio) + // Bits 8-11: Audio LSB extension (zeros for 16-bit audio) + // Bits 12-27: 16-bit audio sample (MSB-aligned in 20-bit audio field) + // Bit 28: V (Validity) - 0 = valid audio + // Bit 29: U (User data) - 0 + // Bit 30: C (Channel status) - from channel status block + // Bit 31: P (Parity) - even parity over bits 4-31 + // ============================================================================ + + // Place 16-bit audio sample at bits 12-27 (little-endian input: [0]=LSB, [1]=MSB) + uint32_t raw_subframe = (static_cast(pcm_sample[1]) << 20) | (static_cast(pcm_sample[0]) << 12); + + // V = 0 (valid audio), U = 0 (no user data) + // C = channel status bit for current frame (same bit used for both L and R subframes) + bool c_bit = this->get_channel_status_bit_(this->frame_in_block_); + if (c_bit) { + raw_subframe |= (1U << 30); + } + + // Calculate even parity over bits 4-30 + // This ensures consistent BMC ending phase regardless of audio content + uint32_t bits_4_30 = (raw_subframe >> 4) & 0x07FFFFFF; // 27 bits (4-30) + uint32_t ones_count = __builtin_popcount(bits_4_30); + uint32_t parity = ones_count & 1; // 1 if odd count, 0 if even + raw_subframe |= parity << 31; // Set P bit to make total even + + // ============================================================================ + // Select preamble based on position in block and channel + // ============================================================================ + // B = block start (left channel, frame 0 of 192-frame block) + // M = left channel (frames 1-191) + // W = right channel (all frames) + uint8_t preamble; + if (this->is_left_channel_) { + preamble = (this->frame_in_block_ == 0) ? PREAMBLE_B : PREAMBLE_M; + } else { + preamble = PREAMBLE_W; + } + + // ============================================================================ + // BMC encode the data portion (bits 4-31) using lookup tables + // ============================================================================ + // The I2S uses 16-bit halfword swap: bits 16-31 transmit before bits 0-15. + // This applies to BOTH word[0] and word[1]. + // + // word[0] transmission order: [16-23] → [24-31] → [0-7] → [8-15] + // For correct S/PDIF subframe order (preamble → aux → audio): + // - bits 16-23: preamble (8 BMC bits) + // - bits 24-31: BMC(subframe bits 4-7) - first aux nibble + // - bits 0-7: BMC(subframe bits 8-11) - second aux nibble + // - bits 8-15: BMC(subframe bits 12-15) - audio low nibble + // + // word[1] transmission order: [16-31] → [0-15] + // For correct S/PDIF subframe order: + // - bits 16-31: BMC(subframe bits 16-23) - audio mid byte + // - bits 0-15: BMC(subframe bits 24-31) - audio high nibble + VUCP + // ============================================================================ + + // All preambles end at phase HIGH. Bits 4-11 are always zero for 16-bit audio; + // two zero nibbles flip phase 8 times total → back to HIGH. + // So bits 12-15 always start encoding at phase=true. + + // Bits 12-15: 4-bit LUT lookup (always phase=true start) + uint32_t nibble = (raw_subframe >> 12) & 0xF; + uint32_t bmc_12_15 = BMC_LUT_4[nibble]; + + // Phase tracking via branchless XOR mask: + // - 0x0000 means phase=true (use LUT value directly) + // - 0xFFFF means phase=false (complement LUT value) + // End phase = start XOR (popcount & 1) since zero-bits flip phase, + // and for even bit widths: #zeros parity == popcount parity. + uint32_t phase_mask = -(__builtin_popcount(nibble) & 1u) & 0xFFFF; + + // Bits 16-23: 8-bit LUT lookup with phase correction + uint32_t byte_mid = (raw_subframe >> 16) & 0xFF; + uint32_t bmc_16_23 = BMC_LUT_8[byte_mid] ^ phase_mask; + phase_mask ^= -(__builtin_popcount(byte_mid) & 1u) & 0xFFFF; + + // Bits 24-31: 8-bit LUT lookup with phase correction + uint32_t byte_hi = (raw_subframe >> 24) & 0xFF; + uint32_t bmc_24_31 = BMC_LUT_8[byte_hi] ^ phase_mask; + + // ============================================================================ + // Combine with correct positioning for I2S transmission + // ============================================================================ + // I2S with halfword swap: transmits bits 16-31, then bits 0-15. + // Within each halfword, MSB (highest bit) is transmitted first. + // + // For upper halfword (bits 16-31): bit 31 → bit 16 + // For lower halfword (bits 0-15): bit 15 → bit 0 + // + // Desired S/PDIF order: preamble → bmc_4_7 → bmc_8_11 → bmc_12_15 + // + // word[0] layout for correct transmission: + // bits 24-31: preamble (transmitted 1st, as MSB of upper halfword) + // bits 16-23: BMC_ZERO_NIBBLE (transmitted 2nd, aux bits 4-7) + // bits 8-15: BMC_ZERO_NIBBLE (transmitted 3rd, aux bits 8-11) + // bits 0-7: bmc_12_15 (transmitted 4th, audio low nibble) + // + // word[1] layout: + // bits 16-31: bmc_16_23 (transmitted 5th) + // bits 0-15: bmc_24_31 (transmitted 6th) + this->spdif_block_ptr_[0] = + bmc_12_15 | (BMC_ZERO_NIBBLE << 8) | (BMC_ZERO_NIBBLE << 16) | (static_cast(preamble) << 24); + this->spdif_block_ptr_[1] = bmc_24_31 | (bmc_16_23 << 16); + this->spdif_block_ptr_ += 2; + + // ============================================================================ + // Update position tracking + // ============================================================================ + if (!this->is_left_channel_) { + // Completed a stereo frame, advance frame counter + if (++this->frame_in_block_ >= SPDIF_BLOCK_SAMPLES) { + this->frame_in_block_ = 0; + } + } + this->is_left_channel_ = !this->is_left_channel_; +} + +esp_err_t SPDIFEncoder::send_block_(TickType_t ticks_to_wait) { + // Use the appropriate callback and context based on preload mode + SPDIFBlockCallback callback; + void *ctx; + + if (this->preload_mode_) { + callback = this->preload_callback_; + ctx = this->preload_callback_ctx_; + } else { + callback = this->write_callback_; + ctx = this->write_callback_ctx_; + } + + if (callback == nullptr) { + return ESP_ERR_INVALID_STATE; + } + + esp_err_t err = callback(ctx, this->spdif_block_buf_.get(), SPDIF_BLOCK_SIZE_BYTES, ticks_to_wait); + + if (err == ESP_OK) { + // Reset pointer for next block; position tracking continues from where it left off + this->spdif_block_ptr_ = this->spdif_block_buf_.get(); + } + + return err; +} + +size_t SPDIFEncoder::get_pending_pcm_bytes() const { + if (this->spdif_block_ptr_ == nullptr || this->spdif_block_buf_ == nullptr) { + return 0; + } + // Each PCM sample (2 bytes) produces 2 uint32_t values in the SPDIF buffer + // So pending uint32s / 2 = pending samples, and each sample is 2 bytes + size_t pending_uint32s = this->spdif_block_ptr_ - this->spdif_block_buf_.get(); + size_t pending_samples = pending_uint32s / 2; + return pending_samples * 2; // 2 bytes per sample +} + +HOT esp_err_t SPDIFEncoder::write(const uint8_t *src, size_t size, TickType_t ticks_to_wait, uint32_t *blocks_sent, + size_t *bytes_consumed) { + const uint8_t *pcm_data = src; + const uint8_t *pcm_end = src + size; + uint32_t block_count = 0; + + while (pcm_data < pcm_end) { + // Check if there's a pending complete block from a previous failed send + if (this->spdif_block_ptr_ >= &this->spdif_block_buf_[SPDIF_BLOCK_SIZE_U32]) { + esp_err_t err = this->send_block_(ticks_to_wait); + if (err != ESP_OK) { + if (blocks_sent != nullptr) { + *blocks_sent = block_count; + } + if (bytes_consumed != nullptr) { + *bytes_consumed = pcm_data - src; + } + return err; + } + ++block_count; + } + + // Encode one 16-bit sample + this->encode_sample_(pcm_data); + pcm_data += 2; + } + + // Send any complete block that was just finished + if (this->spdif_block_ptr_ >= &this->spdif_block_buf_[SPDIF_BLOCK_SIZE_U32]) { + esp_err_t err = this->send_block_(ticks_to_wait); + if (err != ESP_OK) { + if (blocks_sent != nullptr) { + *blocks_sent = block_count; + } + if (bytes_consumed != nullptr) { + *bytes_consumed = pcm_data - src; + } + return err; + } + ++block_count; + } + + if (blocks_sent != nullptr) { + *blocks_sent = block_count; + } + if (bytes_consumed != nullptr) { + *bytes_consumed = size; + } + return ESP_OK; +} + +esp_err_t SPDIFEncoder::flush_with_silence(TickType_t ticks_to_wait) { + // First, send any pending complete block from a previous failed send + if (this->spdif_block_ptr_ >= &this->spdif_block_buf_[SPDIF_BLOCK_SIZE_U32]) { + esp_err_t err = this->send_block_(ticks_to_wait); + if (err != ESP_OK) { + return err; + } + } + + if (!this->has_pending_data()) { + return ESP_OK; // Nothing to flush + } + + // Encode silence (zeros) until the block is complete + static const uint8_t SILENCE[2] = {0, 0}; + + while (this->spdif_block_ptr_ < &this->spdif_block_buf_[SPDIF_BLOCK_SIZE_U32]) { + this->encode_sample_(SILENCE); + } + + return this->send_block_(ticks_to_wait); +} + +} // namespace esphome::i2s_audio + +#endif // USE_I2S_AUDIO_SPDIF_MODE diff --git a/esphome/components/i2s_audio/speaker/spdif_encoder.h b/esphome/components/i2s_audio/speaker/spdif_encoder.h new file mode 100644 index 0000000000..8516643432 --- /dev/null +++ b/esphome/components/i2s_audio/speaker/spdif_encoder.h @@ -0,0 +1,146 @@ +#pragma once + +#include "esphome/core/defines.h" + +#if defined(USE_ESP32) && defined(USE_I2S_AUDIO_SPDIF_MODE) + +#include +#include +#include +#include +#include "esp_err.h" +#include "esphome/core/helpers.h" + +namespace esphome::i2s_audio { + +// A SPDIF sample is 64-bits +static constexpr uint8_t SPDIF_BITS_PER_SAMPLE = 64; +// Number of samples in a SPDIF block +static constexpr uint16_t SPDIF_BLOCK_SAMPLES = 192; +// To emulate bi-phase mark code (BMC) (aka differential Manchester encoding) we send twice +// as many bits per sample so that we can generate the transitions this encoding requires. +static constexpr uint8_t EMULATED_BMC_BITS_PER_SAMPLE = SPDIF_BITS_PER_SAMPLE * 2; +static constexpr uint16_t SPDIF_BLOCK_SIZE_BYTES = SPDIF_BLOCK_SAMPLES * (EMULATED_BMC_BITS_PER_SAMPLE / 8); +static constexpr uint32_t SPDIF_BLOCK_SIZE_U32 = SPDIF_BLOCK_SIZE_BYTES / sizeof(uint32_t); // 3072 bytes / 4 = 768 +// I2S frame count for one SPDIF block (for new driver where frame = 8 bytes for 32-bit stereo) +static constexpr uint32_t SPDIF_BLOCK_I2S_FRAMES = SPDIF_BLOCK_SIZE_BYTES / 8; // 3072 / 8 = 384 frames +// PCM bytes needed for one complete SPDIF block (192 stereo frames * 2 bytes per sample * 2 channels) +static constexpr uint16_t SPDIF_PCM_BYTES_PER_BLOCK = SPDIF_BLOCK_SAMPLES * 2 * 2; // = 768 bytes + +/// Callback signature for block completion (raw function pointer for minimal overhead) +/// @param user_ctx User context pointer passed during callback registration +/// @param data Pointer to SPDIF encoded block data +/// @param size Size of the block in bytes (always SPDIF_BLOCK_SIZE_BYTES) +/// @param ticks_to_wait FreeRTOS ticks to wait for write completion +/// @return ESP_OK on success, or an error code +using SPDIFBlockCallback = esp_err_t (*)(void *user_ctx, uint32_t *data, size_t size, TickType_t ticks_to_wait); + +class SPDIFEncoder { + public: + /// @brief Initialize the SPDIF working buffer + /// @return true if setup was successful, false if allocation failed + bool setup(); + + /// @brief Set callback for normal writes (used when channel is running) + /// @param callback Function pointer to call when a block is ready + /// @param user_ctx Context pointer passed to callback (typically 'this' pointer of speaker) + void set_write_callback(SPDIFBlockCallback callback, void *user_ctx) { + this->write_callback_ = callback; + this->write_callback_ctx_ = user_ctx; + } + + /// @brief Set callback for preload writes (used when preloading to DMA before enabling channel) + /// @param callback Function pointer to call when a block is ready for preload + /// @param user_ctx Context pointer passed to callback (typically 'this' pointer of speaker) + void set_preload_callback(SPDIFBlockCallback callback, void *user_ctx) { + this->preload_callback_ = callback; + this->preload_callback_ctx_ = user_ctx; + } + + /// @brief Enable or disable preload mode + /// When in preload mode, completed blocks use the preload callback instead of write callback + void set_preload_mode(bool preload) { this->preload_mode_ = preload; } + + /// @brief Check if currently in preload mode + bool is_preload_mode() const { return this->preload_mode_; } + + /// @brief Convert PCM audio data to SPDIF BMC encoded data + /// @param src Source PCM audio data (16-bit stereo) + /// @param size Size of source data in bytes + /// @param ticks_to_wait Timeout for blocking writes + /// @param blocks_sent Optional pointer to receive the number of complete SPDIF blocks sent + /// @param bytes_consumed Optional pointer to receive the number of PCM bytes consumed from src + /// @return esp_err_t as returned from the callback + esp_err_t write(const uint8_t *src, size_t size, TickType_t ticks_to_wait, uint32_t *blocks_sent = nullptr, + size_t *bytes_consumed = nullptr); + + /// @brief Get the number of PCM bytes currently pending in the partial block buffer + /// @return Number of pending PCM bytes (0 to SPDIF_PCM_BYTES_PER_BLOCK - 1) + size_t get_pending_pcm_bytes() const; + + /// @brief Get the number of PCM frames currently pending in the partial block buffer + /// @return Number of pending PCM frames (0 to SPDIF_BLOCK_SAMPLES - 1) + uint32_t get_pending_frames() const { return this->get_pending_pcm_bytes() / 4; } + + /// @brief Check if there is a partial block pending + bool has_pending_data() const { return this->spdif_block_ptr_ != this->spdif_block_buf_.get(); } + + /// @brief Flush any pending partial block by padding with silence and sending + /// @param ticks_to_wait Timeout for blocking writes + /// @return esp_err_t as returned from the callback, or ESP_OK if nothing to flush + esp_err_t flush_with_silence(TickType_t ticks_to_wait); + + /// @brief Reset the SPDIF block buffer and position tracking, discarding any partial block + void reset(); + + /// @brief Set the sample rate for Channel Status Block encoding + /// @param sample_rate Sample rate in Hz (e.g., 44100, 48000, 96000) + /// Call this before writing audio data to ensure correct channel status. + void set_sample_rate(uint32_t sample_rate); + + /// @brief Get the currently configured sample rate + uint32_t get_sample_rate() const { return this->sample_rate_; } + + protected: + /// @brief Encode a single 16-bit PCM sample into the current block position + HOT void encode_sample_(const uint8_t *pcm_sample); + + /// @brief Send the completed block via the appropriate callback + esp_err_t send_block_(TickType_t ticks_to_wait); + + /// @brief Build the channel status block from current configuration + void build_channel_status_(); + + /// @brief Get the channel status bit for a specific frame + /// @param frame Frame number (0-191) + /// @return The C bit value for this frame + ESPHOME_ALWAYS_INLINE inline bool get_channel_status_bit_(uint8_t frame) const { + // Channel status is 192 bits transmitted over 192 frames + // Bit N is transmitted in frame N, LSB-first within each byte + return (this->channel_status_[frame >> 3] >> (frame & 7)) & 1; + } + + // Member ordering optimized to minimize padding (largest alignment first) + + // 4-byte aligned members (pointers and uint32_t) + SPDIFBlockCallback write_callback_{nullptr}; + SPDIFBlockCallback preload_callback_{nullptr}; + void *write_callback_ctx_{nullptr}; + void *preload_callback_ctx_{nullptr}; + std::unique_ptr spdif_block_buf_; // Working buffer for SPDIF block (heap allocated) + uint32_t *spdif_block_ptr_{nullptr}; // Current position in block buffer + uint32_t sample_rate_{48000}; // Sample rate for Channel Status Block encoding + + // 1-byte aligned members (grouped together to avoid internal padding) + uint8_t frame_in_block_{0}; // 0-191, tracks stereo frame position within block + bool is_left_channel_{true}; // Alternates L/R for stereo samples + bool preload_mode_{false}; // Whether to use preload callback vs write callback + + // Channel Status Block (192 bits = 24 bytes, transmitted over 192 frames) + // Placed last since std::array has 1-byte alignment + std::array channel_status_{}; +}; + +} // namespace esphome::i2s_audio + +#endif // USE_I2S_AUDIO_SPDIF_MODE diff --git a/esphome/core/defines.h b/esphome/core/defines.h index 85454d3cc0..162a6034b8 100644 --- a/esphome/core/defines.h +++ b/esphome/core/defines.h @@ -72,6 +72,7 @@ #define USE_GRAPHICAL_DISPLAY_MENU #define USE_HOMEASSISTANT_TIME #define USE_HTTP_REQUEST_OTA_WATCHDOG_TIMEOUT 8000 // NOLINT +#define USE_I2S_AUDIO_SPDIF_MODE #define USE_IMAGE #define USE_IMPROV_SERIAL #define USE_IMPROV_SERIAL_NEXT_URL diff --git a/tests/components/speaker/spdif_mode.esp32-idf.yaml b/tests/components/speaker/spdif_mode.esp32-idf.yaml new file mode 100644 index 0000000000..4d6859feae --- /dev/null +++ b/tests/components/speaker/spdif_mode.esp32-idf.yaml @@ -0,0 +1,25 @@ +substitutions: + i2s_bclk_pin: GPIO27 + i2s_lrclk_pin: GPIO26 + i2s_mclk_pin: GPIO25 + i2s_dout_pin: GPIO12 + spdif_data_pin: GPIO4 + +packages: + i2c: !include ../../test_build_components/common/i2c/esp32-idf.yaml + +i2s_audio: + - id: i2s_output + +speaker: + - platform: i2s_audio + id: speaker_id + dac_type: external + i2s_dout_pin: ${spdif_data_pin} + spdif_mode: true + use_apll: true + timeout: 2s + sample_rate: 48000 + bits_per_sample: 16bit + channel: stereo + i2s_mode: primary