[store_yaml] Refactor YAML discovery to share bundle.py's approach

The previous implementation extended `track_yaml_loads` across the entire
validation pass to catch deferred `!include` and package loads, but that also
captured framework YAML loaded internally by component validators (e.g.
LVGL's `hello_world.yaml`) and produced spurious "unresolved substitution"
warnings during the pre-validation force-load. bundle.py already had the
right pattern: a fresh post-validation re-parse plus `force_load_include_files`.

- Lift the discovery into `yaml_util.discover_user_yaml_files` and have both
  `bundle.py` and `config.py` use it (DRY).
- Capture `secrets.yaml` / `secrets.yml` by the *un-resolved* listener fname
  so a `secrets.yaml` symlinked to a non-secrets-named target is still
  flagged for redaction.
- Add a `warn_on_unresolved` flag to `force_load_include_files` so the
  discovery path (where substitutions haven't run) logs at debug instead of
  warning.
- Reject `store_yaml` configs with an unencrypted API via
  `FINAL_VALIDATE_SCHEMA`; an explicit `allow_unencrypted: true` opt-out
  keeps lab setups working (renamed from `allow_unencrypted_api` so the
  integration-test harness's naive `api:` string replacement doesn't
  clobber it).
- Annotate `store_yaml_chunk_buf` with the
  `cppcoreguidelines-avoid-non-const-global-variables` suppression that
  matches other intentional API-side globals.
- Update unit tests to exercise the new `DiscoveredYamlFiles` shape plus a
  symlink-secrets case.
This commit is contained in:
J. Nick Koston
2026-05-15 06:44:46 -07:00
parent 6493fdaba1
commit f06e96685b
6 changed files with 107 additions and 54 deletions

View File

@@ -1175,7 +1175,11 @@ void APIConnection::on_camera_image_request(const CameraImageRequest &msg) {
// Chunk size per GetYamlResponse. Small enough to leave room for the protobuf frame
// inside the 65535-byte API limit and friendly to TCP MSS.
static constexpr size_t STORE_YAML_CHUNK_SIZE = 512;
// Scratch buffer used to copy a chunk from PROGMEM (needed on ESP8266; harmless elsewhere).
// Scratch buffer used to copy a chunk from PROGMEM (needed on ESP8266; harmless
// elsewhere). Shared across connections is safe because the API loop is
// single-threaded and each chunk is filled and consumed atomically inside one
// `try_send_store_yaml_` iteration.
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
static uint8_t store_yaml_chunk_buf[STORE_YAML_CHUNK_SIZE];
void APIConnection::on_get_yaml_request() {

View File

@@ -8,8 +8,9 @@ from types import ModuleType
import esphome.codegen as cg
import esphome.config_validation as cv
from esphome.const import CONF_ID, CONF_RAW_DATA_ID, SECRETS_FILES
from esphome.const import CONF_API, CONF_ID, CONF_RAW_DATA_ID
from esphome.core import CORE, EsphomeError, HexInt
import esphome.final_validate as fv
_LOGGER = logging.getLogger(__name__)
@@ -17,6 +18,10 @@ CODEOWNERS = ["@bdraco"]
DEPENDENCIES = ["api"]
CONF_INCLUDE_SECRETS = "include_secrets"
# Avoid an `_api:` substring in the key name so the integration-test harness
# (which naively str-replaces `api:` to inject a port directive) doesn't
# clobber configs that opt into this escape hatch.
CONF_ALLOW_UNENCRYPTED = "allow_unencrypted"
store_yaml_ns = cg.esphome_ns.namespace("store_yaml")
StoreYamlComponent = store_yaml_ns.class_("StoreYamlComponent", cg.Component)
@@ -33,10 +38,38 @@ CONFIG_SCHEMA = cv.Schema(
cv.GenerateID(): cv.declare_id(StoreYamlComponent),
cv.GenerateID(CONF_RAW_DATA_ID): cv.declare_id(cg.uint8),
cv.Optional(CONF_INCLUDE_SECRETS, default=False): cv.boolean,
cv.Optional(CONF_ALLOW_UNENCRYPTED, default=False): cv.boolean,
}
).extend(cv.COMPONENT_SCHEMA)
def _final_validate(config):
"""Require API encryption: an unauthenticated client could otherwise pull
the embedded YAML (which may include Wi-Fi credentials or opted-in
secrets). The escape hatch ``allow_unencrypted_api: true`` exists for
isolated lab setups where the user has accepted the trade-off."""
full = fv.full_config.get()
api_conf = full.get(CONF_API, {})
if api_conf.get("encryption"):
return config
if config.get(CONF_ALLOW_UNENCRYPTED):
_LOGGER.warning(
"store_yaml is enabled without API encryption; any client that can "
"reach the device on the network can pull the embedded YAML."
)
return config
raise cv.Invalid(
"store_yaml requires API encryption (configure `api.encryption.key`). "
"Without encryption, the embedded YAML — which may contain Wi-Fi "
"credentials or opted-in secrets — can be read by any client that "
"reaches the device. Set `store_yaml.allow_unencrypted: true` to "
"override after acknowledging the risk."
)
FINAL_VALIDATE_SCHEMA = _final_validate
def _import_zstd() -> ModuleType:
try:
from compression import zstd # noqa: PLC0415 — Python 3.14+ stdlib
@@ -53,8 +86,8 @@ def _import_zstd() -> ModuleType:
def _gather_files(include_secrets: bool) -> list[tuple[str, bytes]]:
"""Read each YAML file the config loader touched, return (relative_path, content) pairs."""
sources = CORE.data.get("yaml_sources")
if not sources:
discovered = CORE.data.get("yaml_sources")
if not discovered or not discovered.files:
raise EsphomeError(
"store_yaml could not find any tracked YAML files; the config loader "
"did not populate CORE.data['yaml_sources']."
@@ -62,19 +95,14 @@ def _gather_files(include_secrets: bool) -> list[tuple[str, bytes]]:
config_path = Path(CORE.config_path).resolve()
root = config_path.parent
secret_paths = discovered.secrets
seen: set[Path] = set()
files: list[tuple[str, bytes]] = []
# ``track_yaml_loads`` already returns resolved Path objects, so no extra
# ``.resolve()`` work is needed here. The symlink edge case (``secrets.yaml``
# symlinked to a non-secrets filename) is not detectable at this layer
# because the original path is lost by the resolver upstream.
for path in sources:
if path in seen:
continue
seen.add(path)
if path.name in SECRETS_FILES and not include_secrets:
for path in discovered.files:
# `secret_paths` was collected from the *un-resolved* basename, so a
# `secrets.yaml` symlinked to a differently-named target is still
# treated as secrets here.
if path in secret_paths and not include_secrets:
content = REDACTED_PLACEHOLDER
else:
try:

View File

@@ -1191,33 +1191,28 @@ def _load_config(
command_line_substitutions: dict[str, Any], skip_external_update: bool = False
) -> Config:
"""Load the configuration file."""
# Keep the file-load listener active across both the YAML parse and the
# validation pass. Substitution and packages resolve deferred `!include`
# references during validation (and remote packages download YAML on
# demand), so the listener must still be installed when those secondary
# loads happen. Components that want the on-disk YAML at codegen time
# (e.g. store_yaml for firmware recovery) read the list out of
# CORE.data["yaml_sources"] after a successful validation.
with yaml_util.track_yaml_loads() as loaded_files:
try:
config = yaml_util.load_yaml(CORE.config_path)
# Resolve any deferred `!include`/package references whose paths
# don't depend on substitutions, so they're captured here too.
yaml_util.force_load_include_files(config)
except EsphomeError as e:
raise InvalidYAMLError(e) from e
try:
config = yaml_util.load_yaml(CORE.config_path)
except EsphomeError as e:
raise InvalidYAMLError(e) from e
try:
result = validate_config(
config, command_line_substitutions, skip_external_update
)
except EsphomeError:
raise
except Exception:
_LOGGER.error("Unexpected exception while reading configuration:")
raise
try:
result = validate_config(
config, command_line_substitutions, skip_external_update
)
except EsphomeError:
raise
except Exception:
_LOGGER.error("Unexpected exception while reading configuration:")
raise
CORE.data["yaml_sources"] = loaded_files
# Discover the user's on-disk YAML files via a fresh re-parse — same
# pattern bundle.py uses. Doing it post-validation (rather than keeping a
# listener installed across validation) avoids capturing framework YAML
# that components load internally (e.g. LVGL's `hello_world.yaml`). The
# result is consumed by components like store_yaml that want to embed the
# user's configuration in firmware for recovery.
CORE.data["yaml_sources"] = yaml_util.discover_user_yaml_files(CORE.config_path)
return result

View File

@@ -1,3 +1,4 @@
api:
store_yaml:
allow_unencrypted: true

View File

@@ -11,3 +11,4 @@ logger:
api:
store_yaml:
allow_unencrypted: true

View File

@@ -14,6 +14,7 @@ from esphome.components.store_yaml import (
_pack_envelope,
)
from esphome.core import CORE, EsphomeError
from esphome.yaml_util import DiscoveredYamlFiles
def _unpack_envelope(blob: bytes) -> dict[str, bytes]:
@@ -57,13 +58,21 @@ def _reset_core() -> None:
CORE.config_path = None
def _set_sources(project_dir: Path, *names: str) -> None:
def _set_sources(project_dir: Path, *names: str, secrets: tuple[str, ...] = ()) -> None:
CORE.config_path = project_dir / "entry.yaml"
CORE.data["yaml_sources"] = [project_dir / name for name in names]
files = [project_dir / name for name in names]
secret_paths = {(project_dir / name).resolve() for name in secrets}
CORE.data["yaml_sources"] = DiscoveredYamlFiles(files, secret_paths)
def test_gather_redacts_secrets_by_default(project: Path) -> None:
_set_sources(project, "entry.yaml", "wifi.yaml", "secrets.yaml")
_set_sources(
project,
"entry.yaml",
"wifi.yaml",
"secrets.yaml",
secrets=("secrets.yaml",),
)
files = dict(_gather_files(include_secrets=False))
assert files["secrets.yaml"] == REDACTED_PLACEHOLDER
assert b"SUPER_SECRET" not in files["secrets.yaml"]
@@ -73,13 +82,33 @@ def test_gather_redacts_secrets_by_default(project: Path) -> None:
def test_gather_redacts_yml_extension(project: Path) -> None:
yml = project / "secrets.yml"
yml.write_text("api_key: OTHER_SECRET\n")
_set_sources(project, "entry.yaml", "secrets.yml")
_set_sources(project, "entry.yaml", "secrets.yml", secrets=("secrets.yml",))
files = dict(_gather_files(include_secrets=False))
assert files["secrets.yml"] == REDACTED_PLACEHOLDER
def test_gather_redacts_secret_symlinked_to_other_name(
project: Path, tmp_path: Path
) -> None:
"""A `secrets.yaml` symlinked to a non-secrets-named target is still redacted
because the un-resolved basename was captured upstream."""
target = tmp_path / "actual_creds.yaml"
target.write_text("api_key: FROM_SYMLINK\n")
link = project / "secrets.yaml"
link.unlink() # remove the regular file laid down by the fixture
link.symlink_to(target)
# Discovery records the un-resolved listener fname under SECRETS_FILES
# but stores the resolved path; mimic that here.
resolved = link.resolve()
CORE.config_path = project / "entry.yaml"
CORE.data["yaml_sources"] = DiscoveredYamlFiles([resolved], {resolved})
files = dict(_gather_files(include_secrets=False))
assert REDACTED_PLACEHOLDER in files.values()
assert b"FROM_SYMLINK" not in b"".join(files.values())
def test_gather_embeds_secrets_when_opted_in(project: Path) -> None:
_set_sources(project, "entry.yaml", "secrets.yaml")
_set_sources(project, "entry.yaml", "secrets.yaml", secrets=("secrets.yaml",))
files = dict(_gather_files(include_secrets=True))
assert b"SUPER_SECRET" in files["secrets.yaml"]
@@ -90,21 +119,16 @@ def test_gather_uses_relative_path_for_external_files(
"""Files outside the project root use a ``..``-style relative path so they don't collide."""
sibling = tmp_path / "outside.yaml"
sibling.write_text("foo: bar\n")
_set_sources(project, "entry.yaml")
CORE.data["yaml_sources"].append(sibling)
CORE.config_path = project / "entry.yaml"
CORE.data["yaml_sources"] = DiscoveredYamlFiles(
[project / "entry.yaml", sibling], set()
)
files = dict(_gather_files(include_secrets=False))
# project root is `tmp_path/project`, sibling is in `tmp_path` so it
# resolves to `../outside.yaml`.
assert "../outside.yaml" in files
def test_gather_deduplicates(project: Path) -> None:
_set_sources(project, "entry.yaml", "wifi.yaml", "wifi.yaml")
files = _gather_files(include_secrets=False)
paths = [p for p, _ in files]
assert paths.count("wifi.yaml") == 1
def test_gather_raises_when_no_sources(project: Path) -> None:
CORE.config_path = project / "entry.yaml"
with pytest.raises(EsphomeError):