diff --git a/esphome/components/i2s_audio/__init__.py b/esphome/components/i2s_audio/__init__.py
index ffa63f5ee8..951b8c0498 100644
--- a/esphome/components/i2s_audio/__init__.py
+++ b/esphome/components/i2s_audio/__init__.py
@@ -201,7 +201,7 @@ async def register_i2s_audio_component(var, config):
 CONFIG_SCHEMA = cv.Schema(
     {
         cv.GenerateID(): cv.declare_id(I2SAudioComponent),
-        cv.Required(CONF_I2S_LRCLK_PIN): pins.internal_gpio_output_pin_number,
+        cv.Optional(CONF_I2S_LRCLK_PIN): pins.internal_gpio_output_pin_number,
         cv.Optional(CONF_I2S_BCLK_PIN): pins.internal_gpio_output_pin_number,
         cv.Optional(CONF_I2S_MCLK_PIN): pins.internal_gpio_output_pin_number,
     },
@@ -290,7 +290,8 @@ async def to_code(config):
     # Helps avoid callbacks being skipped due to processor load
     add_idf_sdkconfig_option("CONFIG_I2S_ISR_IRAM_SAFE", True)
 
-    cg.add(var.set_lrclk_pin(config[CONF_I2S_LRCLK_PIN]))
+    if CONF_I2S_LRCLK_PIN in config:
+        cg.add(var.set_lrclk_pin(config[CONF_I2S_LRCLK_PIN]))
     if CONF_I2S_BCLK_PIN in config:
         cg.add(var.set_bclk_pin(config[CONF_I2S_BCLK_PIN]))
     if CONF_I2S_MCLK_PIN in config:
diff --git a/esphome/components/i2s_audio/speaker/__init__.py b/esphome/components/i2s_audio/speaker/__init__.py
index 99aa712c68..759cc40ca9 100644
--- a/esphome/components/i2s_audio/speaker/__init__.py
+++ b/esphome/components/i2s_audio/speaker/__init__.py
@@ -18,10 +18,12 @@ from .. import (
     CONF_I2S_DOUT_PIN,
     CONF_I2S_MODE,
     CONF_LEFT,
+    CONF_MCLK_MULTIPLE,
     CONF_MONO,
     CONF_PRIMARY,
     CONF_RIGHT,
     CONF_STEREO,
+    CONF_USE_APLL,
     I2SAudioOut,
     i2s_audio_component_schema,
     i2s_audio_ns,
@@ -40,6 +42,15 @@ I2SAudioSpeaker = i2s_audio_ns.class_("I2SAudioSpeaker", I2SAudioSpeakerBase)
 
 CONF_DAC_TYPE = "dac_type"
 CONF_I2S_COMM_FMT = "i2s_comm_fmt"
+CONF_SPDIF_MODE = "spdif_mode"
+
+I2SAudioSpeakerBase = i2s_audio_ns.class_(
+    "I2SAudioSpeakerBase", cg.Component, speaker.Speaker, I2SAudioOut
+)
+I2SAudioSpeaker = i2s_audio_ns.class_("I2SAudioSpeaker", I2SAudioSpeakerBase)
+I2SAudioSpeakerSPDIF = i2s_audio_ns.class_("I2SAudioSpeakerSPDIF", I2SAudioSpeakerBase)
+
+I2SCommFmt = i2s_audio_ns.enum("I2SCommFmt", is_class=True)
 
 I2SCommFmt = i2s_audio_ns.enum("I2SCommFmt", is_class=True)
 
@@ -77,7 +88,17 @@ def _set_num_channels_from_config(config):
 
 
 def _set_stream_limits(config):
-    if config[CONF_I2S_MODE] == CONF_PRIMARY:
+    if config.get(CONF_SPDIF_MODE, False):
+        # SPDIF mode: fixed to 16-bit stereo at configured sample rate
+        audio.set_stream_limits(
+            min_bits_per_sample=16,
+            max_bits_per_sample=16,
+            min_channels=2,
+            max_channels=2,
+            min_sample_rate=config.get(CONF_SAMPLE_RATE),
+            max_sample_rate=config.get(CONF_SAMPLE_RATE),
+        )(config)
+    elif config[CONF_I2S_MODE] == CONF_PRIMARY:
         # Primary mode has modifiable stream settings
         audio.set_stream_limits(
             min_bits_per_sample=8,
@@ -101,6 +122,13 @@ def _set_stream_limits(config):
     return config
 
 
+def _select_speaker_class(config):
+    """Override ID type when SPDIF mode is enabled."""
+    if config.get(CONF_SPDIF_MODE, False):
+        config[CONF_ID].type = I2SAudioSpeakerSPDIF
+    return config
+
+
 def _validate_esp32_variant(config):
     variant = esp32.get_esp32_variant()
     if config[CONF_DAC_TYPE] == "internal":
@@ -155,6 +183,7 @@ CONFIG_SCHEMA = cv.All(
                     cv.Optional(CONF_I2S_COMM_FMT, default="stand_i2s"): cv.one_of(
                         *I2C_COMM_FMT_OPTIONS, lower=True
                     ),
+                    cv.Optional(CONF_SPDIF_MODE, default=False): cv.boolean,
                 }
             ),
         },
@@ -163,6 +192,7 @@ CONFIG_SCHEMA = cv.All(
     _validate_esp32_variant,
     _set_num_channels_from_config,
     _set_stream_limits,
+    _select_speaker_class,
     validate_mclk_divisible_by_3,
 )
 
@@ -175,6 +205,28 @@ def _final_validate(config):
     if config[CONF_I2S_COMM_FMT] == "stand_max":
         raise cv.Invalid("I2S standard max format is no longer supported.")
 
+    if config.get(CONF_SPDIF_MODE, False):
+        # SPDIF mode specific validations
+        if config[CONF_SAMPLE_RATE] not in [44100, 48000]:
+            raise cv.Invalid(
+                "SPDIF mode only supports 44100 Hz or 48000 Hz sample rates"
+            )
+        if config[CONF_CHANNEL] != CONF_STEREO:
+            raise cv.Invalid("SPDIF mode only supports stereo channel configuration")
+        # bits_per_sample is converted to float by the schema
+        if config[CONF_BITS_PER_SAMPLE] != 16:
+            raise cv.Invalid("SPDIF mode only supports 16 bits per sample")
+        if not config[CONF_USE_APLL]:
+            raise cv.Invalid(
+                "SPDIF mode requires 'use_apll: true' for accurate clock generation"
+            )
+        if config[CONF_I2S_MODE] != CONF_PRIMARY:
+            raise cv.Invalid("SPDIF mode requires 'i2s_mode: primary'")
+        if config[CONF_I2S_COMM_FMT] != "stand_i2s":
+            raise cv.Invalid("SPDIF mode requires 'i2s_comm_fmt: stand_i2s'")
+        if config[CONF_MCLK_MULTIPLE] != 256:
+            raise cv.Invalid("SPDIF mode requires 'mclk_multiple: 256'")
+
 
 FINAL_VALIDATE_SCHEMA = _final_validate
 
@@ -186,12 +238,18 @@ async def to_code(config):
     await speaker.register_speaker(var, config)
 
     cg.add(var.set_dout_pin(config[CONF_I2S_DOUT_PIN]))
-    fmt = I2SCommFmt.STANDARD  # equals stand_i2s, stand_pcm_long, i2s_msb, pcm_long
-    if config[CONF_I2S_COMM_FMT] in ["stand_msb", "i2s_lsb"]:
-        fmt = I2SCommFmt.MSB
-    elif config[CONF_I2S_COMM_FMT] in ["stand_pcm_short", "pcm_short", "pcm"]:
-        fmt = I2SCommFmt.PCM
-    cg.add(var.set_i2s_comm_fmt(fmt))
+
+    is_spdif = config.get(CONF_SPDIF_MODE, False)
+    if is_spdif:
+        cg.add_define("USE_I2S_AUDIO_SPDIF_MODE")
+    else:
+        fmt = I2SCommFmt.STANDARD  # equals stand_i2s, stand_pcm_long, i2s_msb, pcm_long
+        if config[CONF_I2S_COMM_FMT] in ["stand_msb", "i2s_lsb"]:
+            fmt = I2SCommFmt.MSB
+        elif config[CONF_I2S_COMM_FMT] in ["stand_pcm_short", "pcm_short", "pcm"]:
+            fmt = I2SCommFmt.PCM
+        cg.add(var.set_i2s_comm_fmt(fmt))
+
     if config[CONF_TIMEOUT] != CONF_NEVER:
         cg.add(var.set_timeout(config[CONF_TIMEOUT]))
     cg.add(var.set_buffer_duration(config[CONF_BUFFER_DURATION]))
diff --git a/esphome/components/i2s_audio/speaker/i2s_audio_spdif.cpp b/esphome/components/i2s_audio/speaker/i2s_audio_spdif.cpp
new file mode 100644
index 0000000000..e2146de63c
--- /dev/null
+++ b/esphome/components/i2s_audio/speaker/i2s_audio_spdif.cpp
@@ -0,0 +1,629 @@
+#include "i2s_audio_spdif.h"
+
+#if defined(USE_ESP32) && defined(USE_I2S_AUDIO_SPDIF_MODE)
+
+#include <driver/i2s_std.h>
+
+#include "esphome/components/audio/audio.h"
+#include "esphome/components/audio/audio_transfer_buffer.h"
+
+#include "esphome/core/hal.h"
+#include "esphome/core/log.h"
+
+#include "esp_timer.h"
+
+namespace esphome::i2s_audio {
+
+static const char *const TAG = "i2s_audio.spdif";
+
+// SPDIF mode adds overhead as each sample is encapsulated in a subframe;
+// each DMA buffer can hold only 192 samples (~4ms each vs. ~15ms for standard I2S).
+// To match the standard I2S buffering duration, we use more buffers to minimize
+// the impact of the overhead, such as stuttering or audio/silence oscillation.
+// 15 buffers x 4ms = 60ms of DMA buffering (same as 4 x 15ms for standard)
+static constexpr size_t SPDIF_DMA_BUFFERS_COUNT = 15;
+
+// Timeout for flushing pending frames if no callback received.
+static constexpr uint32_t SPDIF_FLUSH_TIMEOUT_MS = 20;
+
+// Number of DMA events between upstream callbacks (~16ms = 4 events x 4ms each).
+// Matches non-SPDIF timing to prevent overwhelming upstream sync algorithms.
+static constexpr uint32_t SPDIF_DMA_EVENTS_PER_CALLBACK = 4;
+
+// Consider TX stalled only if no DMA callbacks have arrived for this long.
+// Zero-block non-blocking writes alone are not sufficient (they can happen when DMA is simply full).
+static constexpr uint32_t SPDIF_STALL_NO_DMA_MS = 80;
+
+// Fallback stall detector: force recovery if silence writes make no forward progress for too long,
+// even if occasional DMA callbacks are still observed.
+static constexpr uint32_t SPDIF_STALL_ZERO_PROGRESS_MS = 1000;
+
+// Minimum spacing between re-prime attempts to avoid churn.
+static constexpr uint32_t SPDIF_REPRIME_COOLDOWN_MS = 500;
+
+// Small waits used in SPDIF mode to keep DMA fed during rapid pipeline churn.
+static constexpr uint32_t SPDIF_EMPTY_READ_DELAY_MS = 1;
+static constexpr uint32_t SPDIF_SILENCE_LOOP_DELAY_MS = 1;
+static constexpr uint32_t SPDIF_PLAY_RETRY_WAIT_MS = 5;
+
+static constexpr size_t SPDIF_I2S_EVENT_QUEUE_COUNT = SPDIF_DMA_BUFFERS_COUNT + 1;
+
+// Static silence buffer for SPDIF continuous mode
+// 192 samples * 2 channels * 2 bytes per sample = 768 bytes
+// Stored in flash (.rodata section) to avoid stack/heap usage
+static const int16_t SPDIF_SILENCE_BUFFER[SPDIF_BLOCK_SAMPLES * 2] = {0};
+
+// Static callback functions for SPDIF encoder (avoids std::function overhead)
+static esp_err_t spdif_preload_cb(void *user_ctx, uint32_t *data, size_t size, TickType_t ticks_to_wait) {
+  auto *speaker = static_cast<I2SAudioSpeakerSPDIF *>(user_ctx);
+  size_t bytes_written = 0;
+  esp_err_t err = i2s_channel_preload_data(speaker->get_tx_handle(), data, size, &bytes_written);
+  if (err != ESP_OK || bytes_written != size) {
+    ESP_LOGW(TAG, "Preload failed: %s (wrote %zu/%zu bytes)", esp_err_to_name(err), bytes_written, size);
+    return (err != ESP_OK) ? err : ESP_ERR_NO_MEM;
+  }
+  return ESP_OK;
+}
+
+static esp_err_t spdif_write_cb(void *user_ctx, uint32_t *data, size_t size, TickType_t ticks_to_wait) {
+  auto *speaker = static_cast<I2SAudioSpeakerSPDIF *>(user_ctx);
+  size_t bytes_written = 0;
+  esp_err_t err = i2s_channel_write(speaker->get_tx_handle(), data, size, &bytes_written, ticks_to_wait);
+  // ESP_ERR_TIMEOUT is expected under DMA backpressure in SPDIF mode.
+  if (err != ESP_OK && err != ESP_ERR_TIMEOUT) {
+    ESP_LOGW(TAG, "I2S write failed: %s (wrote %zu/%zu bytes)", esp_err_to_name(err), bytes_written, size);
+  }
+  return err;
+}
+
+void I2SAudioSpeakerSPDIF::setup() {
+  I2SAudioSpeakerBase::setup();
+  if (this->is_failed()) {
+    return;
+  }
+
+  this->spdif_encoder_ = new SPDIFEncoder();
+  if (!this->spdif_encoder_->setup()) {
+    ESP_LOGE(TAG, "Encoder setup failed");
+    this->mark_failed();
+    return;
+  }
+
+  // Configure channel status block with the sample rate
+  this->spdif_encoder_->set_sample_rate(this->sample_rate_);
+
+  // Separate callbacks for preload (during underflow recovery) and normal writes
+  this->spdif_encoder_->set_preload_callback(spdif_preload_cb, this);
+  this->spdif_encoder_->set_write_callback(spdif_write_cb, this);
+}
+
+void I2SAudioSpeakerSPDIF::dump_config() {
+  I2SAudioSpeakerBase::dump_config();
+  ESP_LOGCONFIG(TAG,
+                "  SPDIF Mode: YES\n"
+                "  Sample Rate: %" PRIu32 " Hz",
+                this->sample_rate_);
+}
+
+void I2SAudioSpeakerSPDIF::on_task_stopped() { this->spdif_silence_start_ = 0; }
+
+size_t I2SAudioSpeakerSPDIF::play(const uint8_t *data, size_t length, TickType_t ticks_to_wait) {
+  if (this->is_failed()) {
+    ESP_LOGE(TAG, "Setup failed; cannot play audio");
+    return 0;
+  }
+
+  // In SPDIF mode, keep accepting upstream audio while the speaker task is active.
+  // This avoids transient drops during stop/start transitions.
+  const bool task_active = (this->speaker_task_handle_ != nullptr);
+
+  if (this->state_ != speaker::STATE_RUNNING && this->state_ != speaker::STATE_STARTING) {
+    this->start();
+  }
+
+  if (!task_active && this->state_ != speaker::STATE_RUNNING) {
+    // Unable to write data to a running speaker, so delay the max amount of time so it can get ready
+    vTaskDelay(ticks_to_wait);
+    ticks_to_wait = 0;
+  }
+
+  size_t bytes_written = 0;
+  if (this->state_ == speaker::STATE_RUNNING || task_active) {
+    std::shared_ptr<ring_buffer::RingBuffer> temp_ring_buffer = this->audio_ring_buffer_.lock();
+    if (temp_ring_buffer != nullptr) {
+      // In SPDIF mode, a tiny wait helps avoid transient 0-byte writes during short backpressure windows.
+      TickType_t effective_ticks_to_wait = ticks_to_wait;
+      if (effective_ticks_to_wait == 0) {
+        effective_ticks_to_wait = pdMS_TO_TICKS(1);
+      }
+      bytes_written = temp_ring_buffer->write_without_replacement((void *) data, length, effective_ticks_to_wait);
+      if (bytes_written == 0 && length > 0) {
+        // Retry once to catch short free-space windows during rapid seek/track transitions.
+        bytes_written =
+            temp_ring_buffer->write_without_replacement((void *) data, length, pdMS_TO_TICKS(SPDIF_PLAY_RETRY_WAIT_MS));
+      }
+    }
+  }
+
+  return bytes_written;
+}
+
+void I2SAudioSpeakerSPDIF::run_speaker_task() {
+  xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::TASK_STARTING);
+
+  // Reset SPDIF encoder at task start to ensure clean state
+  // (previous task may have left stale data in encoder buffer)
+  if (this->spdif_encoder_ != nullptr) {
+    this->spdif_encoder_->reset();
+  }
+
+  const uint32_t dma_buffers_duration_ms = DMA_BUFFER_DURATION_MS * SPDIF_DMA_BUFFERS_COUNT;
+  // Ensure ring buffer duration is at least the duration of all DMA buffers
+  const uint32_t ring_buffer_duration = std::max(dma_buffers_duration_ms, this->buffer_duration_ms_);
+
+  // The DMA buffers may have more bits per sample, so calculate buffer sizes based on the input audio stream info
+  const size_t ring_buffer_size = this->current_stream_info_.ms_to_bytes(ring_buffer_duration);
+
+  // For SPDIF mode, one DMA buffer = one SPDIF block = 192 PCM frames
+  const uint32_t frames_to_fill_single_dma_buffer = SPDIF_BLOCK_SAMPLES;
+  const size_t bytes_to_fill_single_dma_buffer =
+      this->current_stream_info_.frames_to_bytes(frames_to_fill_single_dma_buffer);
+
+  bool successful_setup = false;
+  std::unique_ptr<audio::AudioSourceTransferBuffer> transfer_buffer =
+      audio::AudioSourceTransferBuffer::create(bytes_to_fill_single_dma_buffer);
+
+  if (transfer_buffer != nullptr) {
+    std::shared_ptr<ring_buffer::RingBuffer> temp_ring_buffer = ring_buffer::RingBuffer::create(ring_buffer_size);
+    if (temp_ring_buffer.use_count() == 1) {
+      transfer_buffer->set_source(temp_ring_buffer);
+      this->audio_ring_buffer_ = temp_ring_buffer;
+      successful_setup = true;
+    }
+  }
+
+  if (!successful_setup) {
+    xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::ERR_ESP_NO_MEM);
+  } else {
+    // Preload DMA buffers with SPDIF-encoded silence before enabling the channel.
+    // This ensures the first data transmitted is valid SPDIF (not raw zeros from
+    // auto_clear) and prevents phantom DMA events before real audio is available.
+    // Track how many buffers were preloaded so the DMA event loop can skip
+    // frame accounting until the preloaded silence has fully drained.
+    uint32_t preload_buffers_remaining = 0;
+    this->spdif_encoder_->set_preload_mode(true);
+    for (size_t i = 0; i < SPDIF_DMA_BUFFERS_COUNT; i++) {
+      uint32_t preload_blocks = 0;
+      esp_err_t preload_err = this->spdif_encoder_->write(reinterpret_cast<const uint8_t *>(SPDIF_SILENCE_BUFFER),
+                                                          sizeof(SPDIF_SILENCE_BUFFER),
+                                                          pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS), &preload_blocks);
+      if (preload_err != ESP_OK || preload_blocks == 0) {
+        break;  // DMA buffers full or error
+      }
+      preload_buffers_remaining += preload_blocks;
+    }
+    this->spdif_encoder_->set_preload_mode(false);
+    this->spdif_encoder_->reset();  // Clean encoder state for the main loop
+
+    // Now register the callback and enable the channel
+    xQueueReset(this->i2s_event_queue_);
+    const i2s_event_callbacks_t callbacks = {.on_sent = i2s_on_sent_cb};
+    i2s_channel_register_event_callback(this->tx_handle_, &callbacks, this);
+    i2s_channel_enable(this->tx_handle_);
+
+    bool stop_gracefully = false;
+    bool tx_dma_underflow = true;
+
+    uint32_t frames_written = 0;
+
+    // SPDIF Continuous Silence Mode + Callback Decimation
+    //
+    // Key principles:
+    // 1. NEVER stop the I2S channel - always output a valid SPDIF stream
+    // 2. When no audio data, output silence-encoded SPDIF blocks (not zeros!)
+    // 3. Fire callbacks every 4 DMA events (~16ms), matching non-SPDIF timing
+    //
+    // This eliminates gaps that cause SPDIF receivers to re-sync, and reduces
+    // callback rate to prevent overwhelming upstream sync algorithms.
+    const uint32_t spdif_callback_threshold = this->current_stream_info_.ms_to_frames(DMA_BUFFER_DURATION_MS);
+    uint32_t spdif_pending_frames = 0;
+    int64_t spdif_pending_timestamp = 0;
+    uint32_t spdif_last_callback_time = millis();
+    // Count DMA events for decimation
+    uint32_t spdif_dma_event_count = 0;
+    uint32_t spdif_last_dma_event_time = millis();
+    // Detect a stalled DMA path (many silence write attempts with zero accepted blocks).
+    uint32_t spdif_zero_block_streak = 0;
+    uint32_t spdif_last_block_progress_time = millis();
+    uint32_t spdif_last_reprime_time = 0;
+
+    xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::TASK_RUNNING);
+
+    // SPDIF continuous mode: loop runs indefinitely, outputting silence when no audio data
+    // to keep the receiver synced. Exits only via break (stream info change or silence timeout).
+    while (true) {
+      uint32_t event_group_bits = xEventGroupGetBits(this->event_group_);
+
+      if (event_group_bits & SpeakerEventGroupBits::COMMAND_STOP) {
+        xEventGroupClearBits(this->event_group_, SpeakerEventGroupBits::COMMAND_STOP);
+        // In SPDIF continuous mode, don't tear down or expose STOPPED here.
+        // Keep the task alive and transition to silence output.
+        this->spdif_silence_start_ = millis();
+        ESP_LOGV(TAG, "COMMAND_STOP received, continuing in silence mode");
+      }
+      if (event_group_bits & SpeakerEventGroupBits::COMMAND_STOP_GRACEFULLY) {
+        xEventGroupClearBits(this->event_group_, SpeakerEventGroupBits::COMMAND_STOP_GRACEFULLY);
+        stop_gracefully = true;
+      }
+
+      if (this->audio_stream_info_ != this->current_stream_info_) {
+        // Audio stream info changed, stop the speaker task so it will restart with the proper settings.
+        ESP_LOGV(TAG, "Exiting: stream info changed");
+        break;
+      }
+
+      int64_t write_timestamp;
+      while (xQueueReceive(this->i2s_event_queue_, &write_timestamp, 0)) {
+        spdif_last_dma_event_time = millis();
+
+        // Skip frame accounting for preloaded silence buffers still draining.
+        // These DMA events correspond to silence that was preloaded before the
+        // channel was enabled, not real audio written by the task.
+        if (preload_buffers_remaining > 0) {
+          preload_buffers_remaining--;
+          continue;
+        }
+
+        // Receives timing events from the I2S on_sent callback. If actual audio data was sent in this event, it passes
+        // on the timing info via the audio_output_callback.
+        uint32_t frames_sent = frames_to_fill_single_dma_buffer;
+        if (frames_to_fill_single_dma_buffer > frames_written) {
+          tx_dma_underflow = true;
+          frames_sent = frames_written;
+          const uint32_t frames_zeroed = frames_to_fill_single_dma_buffer - frames_written;
+          write_timestamp -= this->current_stream_info_.frames_to_microseconds(frames_zeroed);
+        } else {
+          tx_dma_underflow = false;
+        }
+        frames_written -= frames_sent;
+
+        // SPDIF Callback Decimation: fire every 4th DMA event (~16ms)
+        // This matches non-SPDIF timing and prevents overwhelming upstream.
+        if (spdif_callback_threshold > 0) {
+          spdif_dma_event_count++;
+
+          // Accumulate frames; always keep the latest timestamp so the
+          // callback reports when the last sample left the wire, not the first.
+          if (frames_sent > 0) {
+            spdif_pending_timestamp = write_timestamp;
+            spdif_pending_frames += frames_sent;
+          }
+
+          // Fire callback every 4 DMA events, or on timeout if we have pending frames
+          bool decimation_reached = (spdif_dma_event_count >= SPDIF_DMA_EVENTS_PER_CALLBACK);
+          bool timeout_flush =
+              (spdif_pending_frames > 0) && ((millis() - spdif_last_callback_time) >= SPDIF_FLUSH_TIMEOUT_MS);
+
+          if (decimation_reached || timeout_flush) {
+            if (spdif_pending_frames > 0) {
+              this->audio_output_callback_(spdif_pending_frames, spdif_pending_timestamp);
+              spdif_pending_frames = 0;
+              spdif_last_callback_time = millis();
+            }
+            spdif_dma_event_count = 0;  // Reset decimation counter
+          }
+        }
+      }
+
+      if (this->pause_state_) {
+        // Pause state is accessed atomically, so thread safe
+        // Delay so the task yields, then skip transferring audio data
+        vTaskDelay(pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS));
+        continue;
+      }
+
+      // Wait half the duration of the data already written to the DMA buffers for new audio data
+      // The millisecond helper modifies the frames_written variable, so use the microsecond helper and divide by 1000
+      uint32_t read_delay = (this->current_stream_info_.frames_to_microseconds(frames_written) / 1000) / 2;
+
+      // In SPDIF mode, if transfer buffer is empty (we're pumping silence), use a very short timeout.
+      // This ensures we can pump silence fast enough to keep the DMA fed (~250 blocks/sec needed).
+      // Otherwise the long timeout based on frames_written causes DMA to run dry.
+      if (transfer_buffer->available() == 0) {
+        read_delay = SPDIF_EMPTY_READ_DELAY_MS;
+      }
+
+      size_t bytes_read = transfer_buffer->transfer_data_from_source(pdMS_TO_TICKS(read_delay));
+      uint8_t *new_data = transfer_buffer->get_buffer_end() - bytes_read;
+
+      if (bytes_read > 0) {
+        this->apply_software_volume_(new_data, bytes_read);
+        this->swap_esp32_mono_samples_(new_data, bytes_read);
+      }
+
+      if (transfer_buffer->available() == 0) {
+        // SPDIF Continuous Silence Mode: always output valid SPDIF stream
+        // When no audio data, write silence-encoded blocks to keep receiver happy
+        if (this->spdif_encoder_ != nullptr) {
+          // "Graceful stop" means "drain buffered audio, then stop." In SPDIF
+          // continuous mode we never actually stop, so once audio is drained
+          // (we're here), reset the flag to re-enable silence writing and stall
+          // recovery. Without this, stop_gracefully stays true forever and
+          // blocks silence output, causing DMA to degrade on auto_clear zeros.
+          stop_gracefully = false;
+
+          // Track when we entered silence mode
+          if (this->spdif_silence_start_ == 0) {
+            this->spdif_silence_start_ = millis();
+          }
+
+          // If silence persists past the configured timeout, stop the task
+          // so components expecting timeout semantics can recover.
+          if (this->timeout_.has_value()) {
+            const uint32_t silence_duration = millis() - this->spdif_silence_start_;
+            if (silence_duration >= this->timeout_.value()) {
+              ESP_LOGV(TAG, "Silence timeout reached (%" PRIu32 "ms) - stopping speaker", silence_duration);
+              break;
+            }
+          }
+
+          // First flush any partial block with silence padding (non-blocking to avoid getting stuck).
+          // IMPORTANT: Credit any partial block frames to frames_written so the audio_output_callback_
+          // fires for them. Without this, pending_playback_frames_ in the mixer's SourceSpeaker never
+          // reaches 0 when a stream ends on a non-192-frame boundary, permanently blocking teardown.
+          if (this->spdif_encoder_->has_pending_data()) {
+            uint32_t partial_frames = this->spdif_encoder_->get_pending_frames();
+            // Use a tiny timeout to allow DMA queue progress without stalling the task.
+            esp_err_t flush_err = this->spdif_encoder_->flush_with_silence(pdMS_TO_TICKS(1));
+            if (flush_err == ESP_OK && partial_frames > 0) {
+              frames_written += partial_frames;
+            }
+          }
+
+          // CRITICAL: In SPDIF continuous mode, ALWAYS write silence when no audio data.
+          // We don't check tx_dma_underflow because:
+          // 1. When DMA runs empty, callbacks stop, so tx_dma_underflow doesn't update
+          // 2. The non-blocking write handles "DMA full" gracefully (just doesn't write)
+          // 3. We need continuous output to prevent receiver from losing sync
+          if (!stop_gracefully) {
+            uint32_t silence_blocks = 0;
+            esp_err_t write_err = this->spdif_encoder_->write(
+                reinterpret_cast<const uint8_t *>(SPDIF_SILENCE_BUFFER), sizeof(SPDIF_SILENCE_BUFFER), pdMS_TO_TICKS(1),
+                &silence_blocks);  // Non-blocking
+                                   // Don't count silence as frames_written - it's not real audio
+
+            // Recovery path for a stalled SPDIF TX channel:
+            // if silence writes repeatedly produce zero blocks AND DMA callbacks have stopped,
+            // re-prime DMA using preload mode.
+            const uint32_t ms_since_dma = millis() - spdif_last_dma_event_time;
+            const bool dma_events_stalled = ms_since_dma >= SPDIF_STALL_NO_DMA_MS;
+            if (silence_blocks > 0) {
+              spdif_last_block_progress_time = millis();
+            }
+            const bool long_zero_progress = (millis() - spdif_last_block_progress_time) >= SPDIF_STALL_ZERO_PROGRESS_MS;
+            if (dma_events_stalled && silence_blocks == 0 && (write_err == ESP_OK || write_err == ESP_ERR_TIMEOUT)) {
+              spdif_zero_block_streak++;
+            } else {
+              spdif_zero_block_streak = 0;
+            }
+
+            const uint32_t now_ms = millis();
+            const bool reprime_cooldown_elapsed =
+                (spdif_last_reprime_time == 0) || ((now_ms - spdif_last_reprime_time) >= SPDIF_REPRIME_COOLDOWN_MS);
+
+            if ((spdif_zero_block_streak >= 100 || long_zero_progress) && reprime_cooldown_elapsed) {
+              ESP_LOGV(TAG, "TX appears stalled, attempting DMA re-prime");
+
+              i2s_channel_disable(this->tx_handle_);
+
+              const i2s_event_callbacks_t null_callbacks = {.on_sent = nullptr};
+              i2s_channel_register_event_callback(this->tx_handle_, &null_callbacks, this);
+
+              this->spdif_encoder_->set_preload_mode(true);
+              uint32_t preload_blocks = 0;
+              esp_err_t preload_err = this->spdif_encoder_->write(
+                  reinterpret_cast<const uint8_t *>(SPDIF_SILENCE_BUFFER), sizeof(SPDIF_SILENCE_BUFFER),
+                  pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS), &preload_blocks);
+              this->spdif_encoder_->set_preload_mode(false);
+
+              xQueueReset(this->i2s_event_queue_);
+              const i2s_event_callbacks_t callbacks = {.on_sent = i2s_on_sent_cb};
+              i2s_channel_register_event_callback(this->tx_handle_, &callbacks, this);
+              i2s_channel_enable(this->tx_handle_);
+
+              if (preload_err == ESP_OK && preload_blocks > 0) {
+                tx_dma_underflow = false;
+                preload_buffers_remaining = preload_blocks;
+                frames_written = 0;  // Stale after channel disable/enable cycle
+                ESP_LOGV(TAG, "DMA re-prime successful (%" PRIu32 " preload blocks)", preload_blocks);
+                spdif_last_block_progress_time = now_ms;
+              } else {
+                ESP_LOGW(TAG, "DMA re-prime failed (%s, blocks=%" PRIu32 ")", esp_err_to_name(preload_err),
+                         preload_blocks);
+              }
+              spdif_last_reprime_time = now_ms;
+              spdif_zero_block_streak = 0;
+            }
+          }
+        }
+
+        if (stop_gracefully && tx_dma_underflow) {
+          // In SPDIF continuous mode, don't break on graceful stop during silence
+          // Keep outputting silence until new audio arrives or explicit COMMAND_STOP
+          // (handled above which transitions to silence mode rather than breaking)
+        }
+
+        // In SPDIF mode, use a shorter delay to pump silence faster
+        // We need ~250 blocks/sec to keep DMA fed, so max 4ms per iteration
+        vTaskDelay(pdMS_TO_TICKS(SPDIF_SILENCE_LOOP_DELAY_MS));
+      } else {
+        // Have audio data to write
+        size_t bytes_written = 0;
+
+        // Clear silence timer since we have audio data now
+        if (this->spdif_silence_start_ != 0) {
+          uint32_t silence_duration = millis() - this->spdif_silence_start_;
+          if (silence_duration > 100) {
+            ESP_LOGV(TAG, "Exiting silence mode after %" PRIu32 "ms, have audio data", silence_duration);
+          }
+          this->spdif_silence_start_ = 0;
+        }
+
+        {
+          uint32_t blocks_sent = 0;
+          size_t pcm_bytes_consumed = 0;
+
+          // Write audio data to encoder (which writes to DMA)
+          esp_err_t err =
+              this->spdif_encoder_->write(transfer_buffer->get_buffer_start(), transfer_buffer->available(),
+                                          pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS), &blocks_sent, &pcm_bytes_consumed);
+          if (err != ESP_OK && err != ESP_ERR_TIMEOUT) {
+            ESP_LOGW(TAG, "Write failed: %s", esp_err_to_name(err));
+          }
+
+          // Only consume source bytes that were actually accepted by the encoder.
+          bytes_written = pcm_bytes_consumed;
+
+          // Update frame accounting based on complete blocks sent (192 frames per block)
+          if (bytes_written > 0) {
+            frames_written += blocks_sent * SPDIF_BLOCK_SAMPLES;
+            transfer_buffer->decrease_buffer_length(bytes_written);
+            // Audio blocks count as DMA progress for the stall detector.
+            // Without this, a long uninterrupted audio stream makes the
+            // progress timer stale, triggering a spurious re-prime the
+            // instant we transition to silence.
+            spdif_last_block_progress_time = millis();
+          }
+        }
+      }
+    }
+    // If we reach here, the while loop exited - either via break or condition became false
+    // In SPDIF mode, loop exit is expected when:
+    // 1. Timeout reached (user configured timeout)
+    // 2. Stream info changed
+    // Only warn if timeout is "never" since that should never exit
+    if (!this->timeout_.has_value()) {
+      ESP_LOGW(TAG, "Unexpected loop exit; set 'timeout: never' to prevent this");
+    }
+  }
+
+  xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::TASK_STOPPING);
+
+  // Reset SPDIF encoder state to prevent stale state on next start
+  if (this->spdif_encoder_ != nullptr) {
+    this->spdif_encoder_->set_preload_mode(false);
+    this->spdif_encoder_->reset();
+  }
+
+  if (transfer_buffer != nullptr) {
+    transfer_buffer.reset();
+  }
+
+  xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::TASK_STOPPED);
+
+  while (true) {
+    // Continuously delay until the loop method deletes the task
+    vTaskDelay(pdMS_TO_TICKS(10));
+  }
+}
+
+esp_err_t I2SAudioSpeakerSPDIF::start_i2s_driver(audio::AudioStreamInfo &audio_stream_info) {
+  this->current_stream_info_ = audio_stream_info;
+
+  // SPDIF mode validation
+  if (this->sample_rate_ != audio_stream_info.get_sample_rate()) {
+    ESP_LOGE(TAG, "Only supports a single sample rate (configured: %" PRIu32 " Hz, stream: %" PRIu32 " Hz)",
+             this->sample_rate_, audio_stream_info.get_sample_rate());
+    return ESP_ERR_NOT_SUPPORTED;
+  }
+  if (audio_stream_info.get_bits_per_sample() != 16) {
+    ESP_LOGE(TAG, "Only supports 16 bits per sample");
+    return ESP_ERR_NOT_SUPPORTED;
+  }
+  if (audio_stream_info.get_channels() != 2) {
+    ESP_LOGE(TAG, "Only supports stereo (2 channels)");
+    return ESP_ERR_NOT_SUPPORTED;
+  }
+
+  if (this->slot_bit_width_ != I2S_SLOT_BIT_WIDTH_AUTO &&
+      (i2s_slot_bit_width_t) audio_stream_info.get_bits_per_sample() > this->slot_bit_width_) {
+    ESP_LOGE(TAG, "Stream bits per sample must be less than or equal to the speaker's configuration");
+    return ESP_ERR_NOT_SUPPORTED;
+  }
+
+  if (!this->parent_->try_lock()) {
+    ESP_LOGE(TAG, "Parent bus is busy");
+    return ESP_ERR_INVALID_STATE;
+  }
+
+  i2s_clock_src_t clk_src = I2S_CLK_SRC_DEFAULT;
+
+#if SOC_CLK_APLL_SUPPORTED
+  if (this->use_apll_) {
+    clk_src = i2s_clock_src_t::I2S_CLK_SRC_APLL;
+  }
+#endif  // SOC_CLK_APLL_SUPPORTED
+
+  // SPDIF mode: fixed configuration for BMC encoding
+  // For new driver, dma_frame_num is in I2S frames (8 bytes each for 32-bit stereo)
+  uint32_t dma_buffer_length = SPDIF_BLOCK_I2S_FRAMES;  // One SPDIF block = 384 I2S frames = 3072 bytes
+
+  // Log DMA configuration for debugging
+  ESP_LOGV(TAG, "I2S DMA config: %zu buffers x %lu frames = %lu bytes total", (size_t) SPDIF_DMA_BUFFERS_COUNT,
+           (unsigned long) dma_buffer_length,
+           (unsigned long) (SPDIF_DMA_BUFFERS_COUNT * dma_buffer_length * 8));  // 8 bytes per frame for 32-bit stereo
+
+  i2s_chan_config_t chan_cfg = {
+      .id = this->parent_->get_port(),
+      .role = this->i2s_role_,
+      .dma_desc_num = SPDIF_DMA_BUFFERS_COUNT,
+      .dma_frame_num = dma_buffer_length,
+      .auto_clear = true,
+      .intr_priority = 3,
+  };
+
+  // SPDIF: double sample rate for BMC, 32-bit stereo, only data pin needed
+  i2s_std_clk_config_t clk_cfg = {
+      .sample_rate_hz = this->sample_rate_ * 2,
+      .clk_src = clk_src,
+      .mclk_multiple = this->mclk_multiple_,
+  };
+
+  i2s_std_slot_config_t slot_cfg = I2S_STD_PHILIPS_SLOT_DEFAULT_CONFIG(I2S_DATA_BIT_WIDTH_32BIT, I2S_SLOT_MODE_STEREO);
+
+  i2s_std_gpio_config_t gpio_cfg = {
+      .mclk = GPIO_NUM_NC,
+      .bclk = GPIO_NUM_NC,
+      .ws = GPIO_NUM_NC,
+      .dout = this->dout_pin_,
+      .din = GPIO_NUM_NC,
+      .invert_flags =
+          {
+              .mclk_inv = false,
+              .bclk_inv = false,
+              .ws_inv = false,
+          },
+  };
+
+  i2s_std_config_t std_cfg = {
+      .clk_cfg = clk_cfg,
+      .slot_cfg = slot_cfg,
+      .gpio_cfg = gpio_cfg,
+  };
+
+  esp_err_t err = this->init_i2s_channel_(chan_cfg, std_cfg, SPDIF_I2S_EVENT_QUEUE_COUNT);
+  if (err != ESP_OK) {
+    return err;
+  }
+
+  // Channel is NOT enabled here. The speaker task will preload DMA buffers
+  // with SPDIF-encoded silence before enabling, ensuring the first data on
+  // the wire is valid SPDIF (not raw zeros from auto_clear) and preventing
+  // phantom DMA events before real audio data is available.
+
+  return ESP_OK;
+}
+
+}  // namespace esphome::i2s_audio
+
+#endif  // USE_ESP32 && USE_I2S_AUDIO_SPDIF_MODE
diff --git a/esphome/components/i2s_audio/speaker/i2s_audio_spdif.h b/esphome/components/i2s_audio/speaker/i2s_audio_spdif.h
new file mode 100644
index 0000000000..ca7774123b
--- /dev/null
+++ b/esphome/components/i2s_audio/speaker/i2s_audio_spdif.h
@@ -0,0 +1,34 @@
+#pragma once
+
+#include "esphome/core/defines.h"
+
+#if defined(USE_ESP32) && defined(USE_I2S_AUDIO_SPDIF_MODE)
+
+#include "i2s_audio_speaker.h"
+#include "spdif_encoder.h"
+
+namespace esphome::i2s_audio {
+
+/// @brief SPDIF speaker implementation.
+/// Encodes PCM audio into IEC 60958-3 S/PDIF bitstream using BMC encoding,
+/// outputting through a single I2S data pin. Maintains continuous output
+/// (silence when no audio) to keep SPDIF receivers synchronized.
+class I2SAudioSpeakerSPDIF : public I2SAudioSpeakerBase {
+ public:
+  void setup() override;
+  void dump_config() override;
+
+  size_t play(const uint8_t *data, size_t length, TickType_t ticks_to_wait) override;
+
+ protected:
+  void run_speaker_task() override;
+  esp_err_t start_i2s_driver(audio::AudioStreamInfo &audio_stream_info) override;
+  void on_task_stopped() override;
+
+  SPDIFEncoder *spdif_encoder_{nullptr};
+  uint32_t spdif_silence_start_{0};  // Timestamp when silence mode started (0 = not in silence)
+};
+
+}  // namespace esphome::i2s_audio
+
+#endif  // USE_ESP32 && USE_I2S_AUDIO_SPDIF_MODE
diff --git a/esphome/components/i2s_audio/speaker/i2s_audio_speaker.cpp b/esphome/components/i2s_audio/speaker/i2s_audio_speaker.cpp
index a71b7db3ba..f34839a314 100644
--- a/esphome/components/i2s_audio/speaker/i2s_audio_speaker.cpp
+++ b/esphome/components/i2s_audio/speaker/i2s_audio_speaker.cpp
@@ -257,7 +257,7 @@ esp_err_t I2SAudioSpeakerBase::init_i2s_channel_(const i2s_chan_config_t &chan_c
 
   err = i2s_channel_init_std_mode(this->tx_handle_, &std_cfg);
   if (err != ESP_OK) {
-    ESP_LOGE(TAG, "Failed to initialize channel");
+    ESP_LOGE(TAG, "Failed to initialize I2S channel");
     i2s_del_channel(this->tx_handle_);
     this->tx_handle_ = nullptr;
     this->parent_->unlock();
diff --git a/esphome/components/i2s_audio/speaker/i2s_audio_speaker.h b/esphome/components/i2s_audio/speaker/i2s_audio_speaker.h
index c598ca1bf8..bfde455c75 100644
--- a/esphome/components/i2s_audio/speaker/i2s_audio_speaker.h
+++ b/esphome/components/i2s_audio/speaker/i2s_audio_speaker.h
@@ -18,7 +18,7 @@
 
 namespace esphome::i2s_audio {
 
-// Shared constants for I2S audio speaker implementations
+// Shared constants used by both standard and SPDIF speaker implementations
 static constexpr uint32_t DMA_BUFFER_DURATION_MS = 15;
 static constexpr size_t TASK_STACK_SIZE = 4096;
 static constexpr ssize_t TASK_PRIORITY = 19;
@@ -42,7 +42,7 @@ enum SpeakerEventGroupBits : uint32_t {
 
 /// @brief Abstract base class for I2S audio speaker implementations.
 /// Provides shared infrastructure (event groups, ring buffer, volume control, task lifecycle)
-/// for derived I2S speaker classes.
+/// for derived standard I2S and SPDIF speaker classes.
 class I2SAudioSpeakerBase : public I2SAudioOut, public speaker::Speaker, public Component {
  public:
   float get_setup_priority() const override { return esphome::setup_priority::PROCESSOR; }
diff --git a/esphome/components/i2s_audio/speaker/spdif_encoder.cpp b/esphome/components/i2s_audio/speaker/spdif_encoder.cpp
new file mode 100644
index 0000000000..a853f934bb
--- /dev/null
+++ b/esphome/components/i2s_audio/speaker/spdif_encoder.cpp
@@ -0,0 +1,385 @@
+#include "spdif_encoder.h"
+
+#if defined(USE_ESP32) && defined(USE_I2S_AUDIO_SPDIF_MODE)
+
+#include "esphome/core/log.h"
+
+namespace esphome::i2s_audio {
+
+static const char *const TAG = "i2s_audio.spdif_encoder";
+
+// S/PDIF preamble patterns (8 BMC bits each)
+// These are the BMC-encoded sync patterns that violate normal BMC rules for easy detection.
+// All preambles end at phase HIGH (last bit = 1), enabling consistent data encoding.
+// Preamble is placed at bits 24-31 of word[0] for MSB-first transmission.
+static constexpr uint8_t PREAMBLE_B = 0x17;  // Block start (left channel, frame 0)
+static constexpr uint8_t PREAMBLE_M = 0x1d;  // Left channel (not block start)
+static constexpr uint8_t PREAMBLE_W = 0x1b;  // Right channel
+
+// BMC encoding of 4 zero bits starting at phase HIGH: 00_11_00_11 = 0x33
+// Since both aux nibbles (bits 4-7, 8-11) are zero for 16-bit audio and phase is preserved, both are 0x33.
+static constexpr uint32_t BMC_ZERO_NIBBLE = 0x33;
+
+// Constexpr BMC encoder for compile-time LUT generation.
+// Encodes with start phase=true (HIGH). The complement property allows phase=false
+// via XOR: bmc_encode(v, N, false) == bmc_encode(v, N, true) ^ mask
+static constexpr uint16_t bmc_lut_encode(uint32_t data, uint8_t num_bits) {
+  uint16_t bmc = 0;
+  bool phase = true;
+  for (uint8_t i = 0; i < num_bits; i++) {
+    bool bit = (data >> i) & 1;
+    uint8_t bmc_pair = phase ? (bit ? 0b01 : 0b00) : (bit ? 0b10 : 0b11);
+    bmc |= static_cast<uint16_t>(bmc_pair) << ((num_bits - 1 - i) * 2);
+    if (!bit)
+      phase = !phase;
+  }
+  return bmc;
+}
+
+// 4-bit BMC lookup table: 16 entries (16 bytes in flash)
+// Index: 4-bit data value (0-15), always phase=true start
+static constexpr auto BMC_LUT_4 = [] {
+  std::array<uint8_t, 16> t{};
+  for (uint32_t i = 0; i < 16; i++)
+    t[i] = static_cast<uint8_t>(bmc_lut_encode(i, 4));
+  return t;
+}();
+
+// 8-bit BMC lookup table: 256 entries (512 bytes in flash)
+// Index: 8-bit data value (0-255), always phase=true start
+static constexpr auto BMC_LUT_8 = [] {
+  std::array<uint16_t, 256> t{};
+  for (uint32_t i = 0; i < 256; i++)
+    t[i] = bmc_lut_encode(i, 8);
+  return t;
+}();
+
+// Initialize S/PDIF buffer
+bool SPDIFEncoder::setup() {
+  this->spdif_block_buf_ = std::make_unique<uint32_t[]>(SPDIF_BLOCK_SIZE_U32);
+  if (!this->spdif_block_buf_) {
+    ESP_LOGE(TAG, "Buffer allocation failed (%zu bytes)", SPDIF_BLOCK_SIZE_BYTES);
+    return false;
+  }
+  ESP_LOGV(TAG, "Buffer allocated (%zu bytes)", SPDIF_BLOCK_SIZE_BYTES);
+
+  // Build initial channel status block with default sample rate
+  this->build_channel_status_();
+
+  this->reset();
+  return true;
+}
+
+void SPDIFEncoder::reset() {
+  this->spdif_block_ptr_ = this->spdif_block_buf_.get();
+  this->frame_in_block_ = 0;
+  this->is_left_channel_ = true;
+}
+
+void SPDIFEncoder::set_sample_rate(uint32_t sample_rate) {
+  if (this->sample_rate_ != sample_rate) {
+    this->sample_rate_ = sample_rate;
+    this->build_channel_status_();
+    ESP_LOGD(TAG, "Sample rate set to %lu Hz", (unsigned long) sample_rate);
+  }
+}
+
+void SPDIFEncoder::build_channel_status_() {
+  // IEC 60958-3 Consumer Channel Status Block (192 bits = 24 bytes)
+  // Transmitted LSB-first within each byte, one bit per frame via C bit
+  //
+  // Byte 0: Control bits
+  //   Bit 0: 0 = Consumer format (not professional AES3)
+  //   Bit 1: 0 = PCM audio (not non-audio data like AC3)
+  //   Bit 2: 0 = No copyright assertion
+  //   Bits 3-5: 000 = No pre-emphasis
+  //   Bits 6-7: 00 = Mode 0 (basic consumer format)
+  //
+  // Byte 1: Category code (0x00 = general, 0x01 = CD, etc.)
+  //
+  // Byte 2: Source/channel numbers
+  //   Bits 0-3: Source number (0 = unspecified)
+  //   Bits 4-7: Channel number (0 = unspecified)
+  //
+  // Byte 3: Sample frequency and clock accuracy
+  //   Bits 0-3: Sample frequency code
+  //   Bits 4-5: Clock accuracy (00 = Level II, ±1000 ppm, appropriate for ESP32)
+  //   Bits 6-7: Reserved (0)
+  //
+  // Bytes 4-23: Reserved (zeros for basic compliance)
+
+  // Clear all bytes first
+  this->channel_status_.fill(0);
+
+  // Byte 0: Consumer, PCM audio, no copyright, no pre-emphasis, Mode 0
+  // All bits are 0, which is already set
+
+  // Byte 1: Category code = 0x00 (general)
+  // Already 0
+
+  // Byte 2: Source/channel unspecified
+  // Already 0
+
+  // Byte 3: Sample frequency code (bits 0-3) + clock accuracy (bits 4-5)
+  // Clock accuracy = 00 (Level II, ±1000 ppm) - appropriate for ESP32
+  uint8_t freq_code;
+  switch (this->sample_rate_) {
+    case 44100:
+      freq_code = 0x0;  // 0000
+      break;
+    case 48000:
+      freq_code = 0x2;  // 0010
+      break;
+    default:
+      // Other values are possible but they're not supported by ESPHome
+      freq_code = 0x1;  // 0001 = not indicated
+      ESP_LOGW(TAG, "Unsupported sample rate %lu Hz, channel status will indicate 'not specified'",
+               (unsigned long) this->sample_rate_);
+      break;
+  }
+  // Byte 3: freq_code in bits 0-3, clock accuracy (00) in bits 4-5
+  this->channel_status_[3] = freq_code;  // Clock accuracy bits 4-5 are already 0
+
+  // Bytes 4-23 remain zero (word length not specified, no original sample freq, etc.)
+}
+
+HOT void SPDIFEncoder::encode_sample_(const uint8_t *pcm_sample) {
+  // ============================================================================
+  // Build raw 32-bit subframe (IEC 60958 format)
+  // ============================================================================
+  // Bit layout:
+  //   Bits 0-3:   Preamble (handled separately, not in raw_subframe)
+  //   Bits 4-7:   Auxiliary audio data (zeros for 16-bit audio)
+  //   Bits 8-11:  Audio LSB extension (zeros for 16-bit audio)
+  //   Bits 12-27: 16-bit audio sample (MSB-aligned in 20-bit audio field)
+  //   Bit 28:     V (Validity) - 0 = valid audio
+  //   Bit 29:     U (User data) - 0
+  //   Bit 30:     C (Channel status) - from channel status block
+  //   Bit 31:     P (Parity) - even parity over bits 4-31
+  // ============================================================================
+
+  // Place 16-bit audio sample at bits 12-27 (little-endian input: [0]=LSB, [1]=MSB)
+  uint32_t raw_subframe = (static_cast<uint32_t>(pcm_sample[1]) << 20) | (static_cast<uint32_t>(pcm_sample[0]) << 12);
+
+  // V = 0 (valid audio), U = 0 (no user data)
+  // C = channel status bit for current frame (same bit used for both L and R subframes)
+  bool c_bit = this->get_channel_status_bit_(this->frame_in_block_);
+  if (c_bit) {
+    raw_subframe |= (1U << 30);
+  }
+
+  // Calculate even parity over bits 4-30
+  // This ensures consistent BMC ending phase regardless of audio content
+  uint32_t bits_4_30 = (raw_subframe >> 4) & 0x07FFFFFF;  // 27 bits (4-30)
+  uint32_t ones_count = __builtin_popcount(bits_4_30);
+  uint32_t parity = ones_count & 1;  // 1 if odd count, 0 if even
+  raw_subframe |= parity << 31;      // Set P bit to make total even
+
+  // ============================================================================
+  // Select preamble based on position in block and channel
+  // ============================================================================
+  // B = block start (left channel, frame 0 of 192-frame block)
+  // M = left channel (frames 1-191)
+  // W = right channel (all frames)
+  uint8_t preamble;
+  if (this->is_left_channel_) {
+    preamble = (this->frame_in_block_ == 0) ? PREAMBLE_B : PREAMBLE_M;
+  } else {
+    preamble = PREAMBLE_W;
+  }
+
+  // ============================================================================
+  // BMC encode the data portion (bits 4-31) using lookup tables
+  // ============================================================================
+  // The I2S uses 16-bit halfword swap: bits 16-31 transmit before bits 0-15.
+  // This applies to BOTH word[0] and word[1].
+  //
+  // word[0] transmission order: [16-23] → [24-31] → [0-7] → [8-15]
+  // For correct S/PDIF subframe order (preamble → aux → audio):
+  //   - bits 16-23: preamble (8 BMC bits)
+  //   - bits 24-31: BMC(subframe bits 4-7) - first aux nibble
+  //   - bits 0-7:   BMC(subframe bits 8-11) - second aux nibble
+  //   - bits 8-15:  BMC(subframe bits 12-15) - audio low nibble
+  //
+  // word[1] transmission order: [16-31] → [0-15]
+  // For correct S/PDIF subframe order:
+  //   - bits 16-31: BMC(subframe bits 16-23) - audio mid byte
+  //   - bits 0-15:  BMC(subframe bits 24-31) - audio high nibble + VUCP
+  // ============================================================================
+
+  // All preambles end at phase HIGH. Bits 4-11 are always zero for 16-bit audio;
+  // two zero nibbles flip phase 8 times total → back to HIGH.
+  // So bits 12-15 always start encoding at phase=true.
+
+  // Bits 12-15: 4-bit LUT lookup (always phase=true start)
+  uint32_t nibble = (raw_subframe >> 12) & 0xF;
+  uint32_t bmc_12_15 = BMC_LUT_4[nibble];
+
+  // Phase tracking via branchless XOR mask:
+  // - 0x0000 means phase=true (use LUT value directly)
+  // - 0xFFFF means phase=false (complement LUT value)
+  // End phase = start XOR (popcount & 1) since zero-bits flip phase,
+  // and for even bit widths: #zeros parity == popcount parity.
+  uint32_t phase_mask = -(__builtin_popcount(nibble) & 1u) & 0xFFFF;
+
+  // Bits 16-23: 8-bit LUT lookup with phase correction
+  uint32_t byte_mid = (raw_subframe >> 16) & 0xFF;
+  uint32_t bmc_16_23 = BMC_LUT_8[byte_mid] ^ phase_mask;
+  phase_mask ^= -(__builtin_popcount(byte_mid) & 1u) & 0xFFFF;
+
+  // Bits 24-31: 8-bit LUT lookup with phase correction
+  uint32_t byte_hi = (raw_subframe >> 24) & 0xFF;
+  uint32_t bmc_24_31 = BMC_LUT_8[byte_hi] ^ phase_mask;
+
+  // ============================================================================
+  // Combine with correct positioning for I2S transmission
+  // ============================================================================
+  // I2S with halfword swap: transmits bits 16-31, then bits 0-15.
+  // Within each halfword, MSB (highest bit) is transmitted first.
+  //
+  // For upper halfword (bits 16-31): bit 31 → bit 16
+  // For lower halfword (bits 0-15):  bit 15 → bit 0
+  //
+  // Desired S/PDIF order: preamble → bmc_4_7 → bmc_8_11 → bmc_12_15
+  //
+  // word[0] layout for correct transmission:
+  //   bits 24-31: preamble        (transmitted 1st, as MSB of upper halfword)
+  //   bits 16-23: BMC_ZERO_NIBBLE (transmitted 2nd, aux bits 4-7)
+  //   bits 8-15:  BMC_ZERO_NIBBLE (transmitted 3rd, aux bits 8-11)
+  //   bits 0-7:   bmc_12_15       (transmitted 4th, audio low nibble)
+  //
+  // word[1] layout:
+  //   bits 16-31: bmc_16_23 (transmitted 5th)
+  //   bits 0-15:  bmc_24_31 (transmitted 6th)
+  this->spdif_block_ptr_[0] =
+      bmc_12_15 | (BMC_ZERO_NIBBLE << 8) | (BMC_ZERO_NIBBLE << 16) | (static_cast<uint32_t>(preamble) << 24);
+  this->spdif_block_ptr_[1] = bmc_24_31 | (bmc_16_23 << 16);
+  this->spdif_block_ptr_ += 2;
+
+  // ============================================================================
+  // Update position tracking
+  // ============================================================================
+  if (!this->is_left_channel_) {
+    // Completed a stereo frame, advance frame counter
+    if (++this->frame_in_block_ >= SPDIF_BLOCK_SAMPLES) {
+      this->frame_in_block_ = 0;
+    }
+  }
+  this->is_left_channel_ = !this->is_left_channel_;
+}
+
+esp_err_t SPDIFEncoder::send_block_(TickType_t ticks_to_wait) {
+  // Use the appropriate callback and context based on preload mode
+  SPDIFBlockCallback callback;
+  void *ctx;
+
+  if (this->preload_mode_) {
+    callback = this->preload_callback_;
+    ctx = this->preload_callback_ctx_;
+  } else {
+    callback = this->write_callback_;
+    ctx = this->write_callback_ctx_;
+  }
+
+  if (callback == nullptr) {
+    return ESP_ERR_INVALID_STATE;
+  }
+
+  esp_err_t err = callback(ctx, this->spdif_block_buf_.get(), SPDIF_BLOCK_SIZE_BYTES, ticks_to_wait);
+
+  if (err == ESP_OK) {
+    // Reset pointer for next block; position tracking continues from where it left off
+    this->spdif_block_ptr_ = this->spdif_block_buf_.get();
+  }
+
+  return err;
+}
+
+size_t SPDIFEncoder::get_pending_pcm_bytes() const {
+  if (this->spdif_block_ptr_ == nullptr || this->spdif_block_buf_ == nullptr) {
+    return 0;
+  }
+  // Each PCM sample (2 bytes) produces 2 uint32_t values in the SPDIF buffer
+  // So pending uint32s / 2 = pending samples, and each sample is 2 bytes
+  size_t pending_uint32s = this->spdif_block_ptr_ - this->spdif_block_buf_.get();
+  size_t pending_samples = pending_uint32s / 2;
+  return pending_samples * 2;  // 2 bytes per sample
+}
+
+HOT esp_err_t SPDIFEncoder::write(const uint8_t *src, size_t size, TickType_t ticks_to_wait, uint32_t *blocks_sent,
+                                  size_t *bytes_consumed) {
+  const uint8_t *pcm_data = src;
+  const uint8_t *pcm_end = src + size;
+  uint32_t block_count = 0;
+
+  while (pcm_data < pcm_end) {
+    // Check if there's a pending complete block from a previous failed send
+    if (this->spdif_block_ptr_ >= &this->spdif_block_buf_[SPDIF_BLOCK_SIZE_U32]) {
+      esp_err_t err = this->send_block_(ticks_to_wait);
+      if (err != ESP_OK) {
+        if (blocks_sent != nullptr) {
+          *blocks_sent = block_count;
+        }
+        if (bytes_consumed != nullptr) {
+          *bytes_consumed = pcm_data - src;
+        }
+        return err;
+      }
+      ++block_count;
+    }
+
+    // Encode one 16-bit sample
+    this->encode_sample_(pcm_data);
+    pcm_data += 2;
+  }
+
+  // Send any complete block that was just finished
+  if (this->spdif_block_ptr_ >= &this->spdif_block_buf_[SPDIF_BLOCK_SIZE_U32]) {
+    esp_err_t err = this->send_block_(ticks_to_wait);
+    if (err != ESP_OK) {
+      if (blocks_sent != nullptr) {
+        *blocks_sent = block_count;
+      }
+      if (bytes_consumed != nullptr) {
+        *bytes_consumed = pcm_data - src;
+      }
+      return err;
+    }
+    ++block_count;
+  }
+
+  if (blocks_sent != nullptr) {
+    *blocks_sent = block_count;
+  }
+  if (bytes_consumed != nullptr) {
+    *bytes_consumed = size;
+  }
+  return ESP_OK;
+}
+
+esp_err_t SPDIFEncoder::flush_with_silence(TickType_t ticks_to_wait) {
+  // First, send any pending complete block from a previous failed send
+  if (this->spdif_block_ptr_ >= &this->spdif_block_buf_[SPDIF_BLOCK_SIZE_U32]) {
+    esp_err_t err = this->send_block_(ticks_to_wait);
+    if (err != ESP_OK) {
+      return err;
+    }
+  }
+
+  if (!this->has_pending_data()) {
+    return ESP_OK;  // Nothing to flush
+  }
+
+  // Encode silence (zeros) until the block is complete
+  static const uint8_t SILENCE[2] = {0, 0};
+
+  while (this->spdif_block_ptr_ < &this->spdif_block_buf_[SPDIF_BLOCK_SIZE_U32]) {
+    this->encode_sample_(SILENCE);
+  }
+
+  return this->send_block_(ticks_to_wait);
+}
+
+}  // namespace esphome::i2s_audio
+
+#endif  // USE_I2S_AUDIO_SPDIF_MODE
diff --git a/esphome/components/i2s_audio/speaker/spdif_encoder.h b/esphome/components/i2s_audio/speaker/spdif_encoder.h
new file mode 100644
index 0000000000..8516643432
--- /dev/null
+++ b/esphome/components/i2s_audio/speaker/spdif_encoder.h
@@ -0,0 +1,146 @@
+#pragma once
+
+#include "esphome/core/defines.h"
+
+#if defined(USE_ESP32) && defined(USE_I2S_AUDIO_SPDIF_MODE)
+
+#include <array>
+#include <cstdint>
+#include <memory>
+#include <freertos/FreeRTOS.h>
+#include "esp_err.h"
+#include "esphome/core/helpers.h"
+
+namespace esphome::i2s_audio {
+
+// A SPDIF sample is 64-bits
+static constexpr uint8_t SPDIF_BITS_PER_SAMPLE = 64;
+// Number of samples in a SPDIF block
+static constexpr uint16_t SPDIF_BLOCK_SAMPLES = 192;
+// To emulate bi-phase mark code (BMC) (aka differential Manchester encoding) we send twice
+//  as many bits per sample so that we can generate the transitions this encoding requires.
+static constexpr uint8_t EMULATED_BMC_BITS_PER_SAMPLE = SPDIF_BITS_PER_SAMPLE * 2;
+static constexpr uint16_t SPDIF_BLOCK_SIZE_BYTES = SPDIF_BLOCK_SAMPLES * (EMULATED_BMC_BITS_PER_SAMPLE / 8);
+static constexpr uint32_t SPDIF_BLOCK_SIZE_U32 = SPDIF_BLOCK_SIZE_BYTES / sizeof(uint32_t);  // 3072 bytes / 4 = 768
+// I2S frame count for one SPDIF block (for new driver where frame = 8 bytes for 32-bit stereo)
+static constexpr uint32_t SPDIF_BLOCK_I2S_FRAMES = SPDIF_BLOCK_SIZE_BYTES / 8;  // 3072 / 8 = 384 frames
+// PCM bytes needed for one complete SPDIF block (192 stereo frames * 2 bytes per sample * 2 channels)
+static constexpr uint16_t SPDIF_PCM_BYTES_PER_BLOCK = SPDIF_BLOCK_SAMPLES * 2 * 2;  // = 768 bytes
+
+/// Callback signature for block completion (raw function pointer for minimal overhead)
+/// @param user_ctx User context pointer passed during callback registration
+/// @param data Pointer to SPDIF encoded block data
+/// @param size Size of the block in bytes (always SPDIF_BLOCK_SIZE_BYTES)
+/// @param ticks_to_wait FreeRTOS ticks to wait for write completion
+/// @return ESP_OK on success, or an error code
+using SPDIFBlockCallback = esp_err_t (*)(void *user_ctx, uint32_t *data, size_t size, TickType_t ticks_to_wait);
+
+class SPDIFEncoder {
+ public:
+  /// @brief Initialize the SPDIF working buffer
+  /// @return true if setup was successful, false if allocation failed
+  bool setup();
+
+  /// @brief Set callback for normal writes (used when channel is running)
+  /// @param callback Function pointer to call when a block is ready
+  /// @param user_ctx Context pointer passed to callback (typically 'this' pointer of speaker)
+  void set_write_callback(SPDIFBlockCallback callback, void *user_ctx) {
+    this->write_callback_ = callback;
+    this->write_callback_ctx_ = user_ctx;
+  }
+
+  /// @brief Set callback for preload writes (used when preloading to DMA before enabling channel)
+  /// @param callback Function pointer to call when a block is ready for preload
+  /// @param user_ctx Context pointer passed to callback (typically 'this' pointer of speaker)
+  void set_preload_callback(SPDIFBlockCallback callback, void *user_ctx) {
+    this->preload_callback_ = callback;
+    this->preload_callback_ctx_ = user_ctx;
+  }
+
+  /// @brief Enable or disable preload mode
+  /// When in preload mode, completed blocks use the preload callback instead of write callback
+  void set_preload_mode(bool preload) { this->preload_mode_ = preload; }
+
+  /// @brief Check if currently in preload mode
+  bool is_preload_mode() const { return this->preload_mode_; }
+
+  /// @brief Convert PCM audio data to SPDIF BMC encoded data
+  /// @param src Source PCM audio data (16-bit stereo)
+  /// @param size Size of source data in bytes
+  /// @param ticks_to_wait Timeout for blocking writes
+  /// @param blocks_sent Optional pointer to receive the number of complete SPDIF blocks sent
+  /// @param bytes_consumed Optional pointer to receive the number of PCM bytes consumed from src
+  /// @return esp_err_t as returned from the callback
+  esp_err_t write(const uint8_t *src, size_t size, TickType_t ticks_to_wait, uint32_t *blocks_sent = nullptr,
+                  size_t *bytes_consumed = nullptr);
+
+  /// @brief Get the number of PCM bytes currently pending in the partial block buffer
+  /// @return Number of pending PCM bytes (0 to SPDIF_PCM_BYTES_PER_BLOCK - 1)
+  size_t get_pending_pcm_bytes() const;
+
+  /// @brief Get the number of PCM frames currently pending in the partial block buffer
+  /// @return Number of pending PCM frames (0 to SPDIF_BLOCK_SAMPLES - 1)
+  uint32_t get_pending_frames() const { return this->get_pending_pcm_bytes() / 4; }
+
+  /// @brief Check if there is a partial block pending
+  bool has_pending_data() const { return this->spdif_block_ptr_ != this->spdif_block_buf_.get(); }
+
+  /// @brief Flush any pending partial block by padding with silence and sending
+  /// @param ticks_to_wait Timeout for blocking writes
+  /// @return esp_err_t as returned from the callback, or ESP_OK if nothing to flush
+  esp_err_t flush_with_silence(TickType_t ticks_to_wait);
+
+  /// @brief Reset the SPDIF block buffer and position tracking, discarding any partial block
+  void reset();
+
+  /// @brief Set the sample rate for Channel Status Block encoding
+  /// @param sample_rate Sample rate in Hz (e.g., 44100, 48000, 96000)
+  /// Call this before writing audio data to ensure correct channel status.
+  void set_sample_rate(uint32_t sample_rate);
+
+  /// @brief Get the currently configured sample rate
+  uint32_t get_sample_rate() const { return this->sample_rate_; }
+
+ protected:
+  /// @brief Encode a single 16-bit PCM sample into the current block position
+  HOT void encode_sample_(const uint8_t *pcm_sample);
+
+  /// @brief Send the completed block via the appropriate callback
+  esp_err_t send_block_(TickType_t ticks_to_wait);
+
+  /// @brief Build the channel status block from current configuration
+  void build_channel_status_();
+
+  /// @brief Get the channel status bit for a specific frame
+  /// @param frame Frame number (0-191)
+  /// @return The C bit value for this frame
+  ESPHOME_ALWAYS_INLINE inline bool get_channel_status_bit_(uint8_t frame) const {
+    // Channel status is 192 bits transmitted over 192 frames
+    // Bit N is transmitted in frame N, LSB-first within each byte
+    return (this->channel_status_[frame >> 3] >> (frame & 7)) & 1;
+  }
+
+  // Member ordering optimized to minimize padding (largest alignment first)
+
+  // 4-byte aligned members (pointers and uint32_t)
+  SPDIFBlockCallback write_callback_{nullptr};
+  SPDIFBlockCallback preload_callback_{nullptr};
+  void *write_callback_ctx_{nullptr};
+  void *preload_callback_ctx_{nullptr};
+  std::unique_ptr<uint32_t[]> spdif_block_buf_;  // Working buffer for SPDIF block (heap allocated)
+  uint32_t *spdif_block_ptr_{nullptr};           // Current position in block buffer
+  uint32_t sample_rate_{48000};                  // Sample rate for Channel Status Block encoding
+
+  // 1-byte aligned members (grouped together to avoid internal padding)
+  uint8_t frame_in_block_{0};   // 0-191, tracks stereo frame position within block
+  bool is_left_channel_{true};  // Alternates L/R for stereo samples
+  bool preload_mode_{false};    // Whether to use preload callback vs write callback
+
+  // Channel Status Block (192 bits = 24 bytes, transmitted over 192 frames)
+  // Placed last since std::array<uint8_t> has 1-byte alignment
+  std::array<uint8_t, 24> channel_status_{};
+};
+
+}  // namespace esphome::i2s_audio
+
+#endif  // USE_I2S_AUDIO_SPDIF_MODE
diff --git a/esphome/core/defines.h b/esphome/core/defines.h
index 85454d3cc0..162a6034b8 100644
--- a/esphome/core/defines.h
+++ b/esphome/core/defines.h
@@ -72,6 +72,7 @@
 #define USE_GRAPHICAL_DISPLAY_MENU
 #define USE_HOMEASSISTANT_TIME
 #define USE_HTTP_REQUEST_OTA_WATCHDOG_TIMEOUT 8000  // NOLINT
+#define USE_I2S_AUDIO_SPDIF_MODE
 #define USE_IMAGE
 #define USE_IMPROV_SERIAL
 #define USE_IMPROV_SERIAL_NEXT_URL
diff --git a/tests/components/speaker/spdif_mode.esp32-idf.yaml b/tests/components/speaker/spdif_mode.esp32-idf.yaml
new file mode 100644
index 0000000000..4d6859feae
--- /dev/null
+++ b/tests/components/speaker/spdif_mode.esp32-idf.yaml
@@ -0,0 +1,25 @@
+substitutions:
+  i2s_bclk_pin: GPIO27
+  i2s_lrclk_pin: GPIO26
+  i2s_mclk_pin: GPIO25
+  i2s_dout_pin: GPIO12
+  spdif_data_pin: GPIO4
+
+packages:
+  i2c: !include ../../test_build_components/common/i2c/esp32-idf.yaml
+
+i2s_audio:
+  - id: i2s_output
+
+speaker:
+  - platform: i2s_audio
+    id: speaker_id
+    dac_type: external
+    i2s_dout_pin: ${spdif_data_pin}
+    spdif_mode: true
+    use_apll: true
+    timeout: 2s
+    sample_rate: 48000
+    bits_per_sample: 16bit
+    channel: stereo
+    i2s_mode: primary