[yaml_util] Promote include-discovery helper, share it with bundle (#16447)

This commit is contained in:
J. Nick Koston
2026-05-15 10:17:50 -07:00
committed by GitHub
parent 8b3bc47547
commit ec1826a6ed
4 changed files with 359 additions and 91 deletions

View File

@@ -260,42 +260,20 @@ class ConfigBundleCreator:
def _discover_yaml_includes(self) -> None:
"""Discover YAML files loaded during config parsing.
Deliberately uses a fresh re-parse and force-loads every deferred
``IncludeFile`` to include *all* potentially-reachable includes,
even branches not selected by the local substitutions. Bundles are
meant to be compiled on another system where command-line
substitution overrides may choose a different branch — e.g.
``!include network/${eth_model}/config.yaml`` must ship every
candidate so the remote build can pick any one.
Entries with unresolved substitution variables in the filename
path are skipped with a warning (they cannot be resolved without
the substitution pass).
Secrets files are tracked separately so we can filter them to
only include the keys this config actually references.
Delegates to :func:`yaml_util.discover_user_yaml_files`, which does a
fresh re-parse and force-loads every deferred ``IncludeFile`` so that
*all* potentially-reachable includes are captured (even branches not
selected by local substitutions). Bundles are meant to be compiled on
another system where command-line substitution overrides may choose a
different branch — e.g. ``!include network/${eth_model}/config.yaml``
must ship every candidate so the remote build can pick any one.
"""
# Must be a fresh parse: IncludeFile.load() caches its result in
# _content, and we discover files by listening for loader calls. On
# an already-parsed tree the cache is populated, .load() returns
# without calling the loader, the listener never fires, and the
# referenced files would be silently dropped from the bundle.
with yaml_util.track_yaml_loads() as loaded_files:
try:
data = yaml_util.load_yaml(self._config_path)
except EsphomeError:
_LOGGER.debug(
"Bundle: re-loading YAML for include discovery failed, "
"proceeding with partial file list"
)
else:
_force_load_include_files(data)
for fpath in loaded_files:
if fpath == self._config_path.resolve():
discovered = yaml_util.discover_user_yaml_files(self._config_path)
self._secrets_paths.update(discovered.secrets)
config_resolved = self._config_path.resolve()
for fpath in discovered.files:
if fpath == config_resolved:
continue # Already added as config
if fpath.name in const.SECRETS_FILES:
self._secrets_paths.add(fpath)
self._add_file(fpath)
def _discover_component_files(self) -> None:
@@ -625,57 +603,6 @@ def _add_bytes_to_tar(tar: tarfile.TarFile, name: str, data: bytes) -> None:
tar.addfile(info, io.BytesIO(data))
def _force_load_include_files(obj: Any, _seen: set[int] | None = None) -> None:
"""Recursively resolve any ``IncludeFile`` instances in a YAML tree.
Nested ``!include`` returns a deferred ``IncludeFile`` that is only
resolved during the substitution pass. During bundle discovery we need
the referenced files to actually load so the ``track_yaml_loads``
listener fires for them.
``IncludeFile`` instances with unresolved substitution variables in the
filename cannot be loaded — we skip and warn about those.
"""
if _seen is None:
_seen = set()
if isinstance(obj, yaml_util.IncludeFile):
if id(obj) in _seen:
return
_seen.add(id(obj))
if obj.has_unresolved_expressions():
_LOGGER.warning(
"Bundle: cannot resolve !include %s (referenced from %s) "
"with substitutions in path",
obj.file,
obj.parent_file,
)
return
try:
loaded = obj.load()
except EsphomeError as err:
_LOGGER.warning(
"Bundle: failed to load !include %s (referenced from %s): %s",
obj.file,
obj.parent_file,
err,
)
return
_force_load_include_files(loaded, _seen)
elif isinstance(obj, dict):
if id(obj) in _seen:
return
_seen.add(id(obj))
for value in obj.values():
_force_load_include_files(value, _seen)
elif isinstance(obj, (list, tuple)):
if id(obj) in _seen:
return
_seen.add(id(obj))
for item in obj:
_force_load_include_files(item, _seen)
def _resolve_include_path(include_path: Any) -> Path | None:
"""Resolve an include path to absolute, skipping system includes."""
if isinstance(include_path, str) and include_path.startswith("<"):

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
from collections.abc import Callable, Generator
from contextlib import contextmanager, suppress
from dataclasses import dataclass, field
import functools
import inspect
from io import BytesIO, TextIOBase, TextIOWrapper
@@ -233,6 +234,130 @@ class IncludeFile:
return has_substitution_or_expression(str(self.file))
def force_load_include_files(
obj: Any,
*,
warn_on_unresolved: bool = True,
_seen: set[int] | None = None,
) -> None:
"""Recursively resolve any deferred ``IncludeFile`` instances in a YAML tree.
Nested ``!include`` returns a deferred ``IncludeFile`` that is only resolved
later (substitution / packages pass). Callers that need every referenced
file to actually load — bundle discovery, on-device YAML recovery — invoke
this while a :func:`track_yaml_loads` listener is active so the underlying
loader fires and records every reachable file.
``IncludeFile`` instances whose path contains unresolved substitution
variables cannot be loaded. By default a warning is logged for each one;
pass ``warn_on_unresolved=False`` (used by discovery paths that run on a
fresh re-parse where substitutions haven't been applied yet) to demote it
to a debug log.
"""
if _seen is None:
_seen = set()
if isinstance(obj, IncludeFile):
if id(obj) in _seen:
return
_seen.add(id(obj))
if obj.has_unresolved_expressions():
log = _LOGGER.warning if warn_on_unresolved else _LOGGER.debug
log(
"Cannot resolve !include %s (referenced from %s) with substitutions in path",
obj.file,
obj.parent_file,
)
return
try:
loaded = obj.load()
except EsphomeError as err:
_LOGGER.warning(
"Failed to load !include %s (referenced from %s): %s",
obj.file,
obj.parent_file,
err,
)
return
force_load_include_files(
loaded, warn_on_unresolved=warn_on_unresolved, _seen=_seen
)
elif isinstance(obj, dict):
if id(obj) in _seen:
return
_seen.add(id(obj))
for value in obj.values():
force_load_include_files(
value, warn_on_unresolved=warn_on_unresolved, _seen=_seen
)
elif isinstance(obj, (list, tuple)):
if id(obj) in _seen:
return
_seen.add(id(obj))
for item in obj:
force_load_include_files(
item, warn_on_unresolved=warn_on_unresolved, _seen=_seen
)
@dataclass(slots=True)
class DiscoveredYamlFiles:
"""Result of :func:`discover_user_yaml_files`.
``files`` contains every resolved path the YAML loader touched while we
were re-parsing the user's config; ``secrets`` is the subset whose
*un-resolved* filename matched :data:`esphome.const.SECRETS_FILES` (so
a ``secrets.yaml`` symlinked to a differently-named target is still
flagged as secrets).
"""
files: list[Path] = field(default_factory=list)
secrets: set[Path] = field(default_factory=set)
def discover_user_yaml_files(config_path: Path) -> DiscoveredYamlFiles:
"""Fresh-re-parse ``config_path`` and report every file the YAML loader
pulled in, plus which of them came in under a secrets filename.
Does NOT run schema validation, substitutions, or package resolution — so
component-internal YAML loaded by validators (LVGL helpers, dashboard
imports, etc.) is *not* captured. Deferred ``!include`` references whose
paths don't depend on substitutions are force-loaded here so they're
captured too.
Must run on a fresh parse because :meth:`IncludeFile.load` caches its
result; on an already-resolved tree :meth:`load` returns without invoking
the loader and the listener would not fire for the referenced files.
"""
from esphome.const import SECRETS_FILES
secrets: set[Path] = set()
def _capture_secret(fname: Path) -> None:
if Path(fname).name in SECRETS_FILES:
secrets.add(Path(fname).resolve())
with track_yaml_loads() as loaded:
_load_listeners.append(_capture_secret)
try:
try:
data = load_yaml(config_path)
except EsphomeError:
return DiscoveredYamlFiles(list(loaded), secrets)
force_load_include_files(data, warn_on_unresolved=False)
finally:
_load_listeners.remove(_capture_secret)
# Deduplicate while preserving first-seen order.
seen: set[Path] = set()
unique: list[Path] = []
for path in loaded:
if path not in seen:
seen.add(path)
unique.append(path)
return DiscoveredYamlFiles(unique, secrets)
def _add_data_ref(fn):
@functools.wraps(fn)
def wrapped(loader, node):

View File

@@ -22,13 +22,13 @@ from esphome.bundle import (
_add_bytes_to_tar,
_default_target_dir,
_find_used_secret_keys,
_force_load_include_files,
extract_bundle,
is_bundle_path,
prepare_bundle_for_compile,
read_bundle_manifest,
)
from esphome.core import CORE, EsphomeError
from esphome.yaml_util import force_load_include_files
# ---------------------------------------------------------------------------
# Helpers
@@ -947,7 +947,7 @@ def test_discover_files_nested_include_load_failure(
paths = [f.path for f in files]
assert "test.yaml" in paths
assert any(
"failed to load !include" in r.message and "missing.yaml" in r.message
"failed to load !include" in r.message.lower() and "missing.yaml" in r.message
for r in caplog.records
)
@@ -974,8 +974,8 @@ def test_force_load_skips_duplicate_include_file() -> None:
# Same instance appears twice — second visit must hit the _seen guard.
tree = {"a": stub, "b": [stub]}
with patch("esphome.bundle.yaml_util.IncludeFile", _StubInclude):
_force_load_include_files(tree)
with patch("esphome.yaml_util.IncludeFile", _StubInclude):
force_load_include_files(tree)
assert stub.load_calls == 1
@@ -989,8 +989,8 @@ def test_force_load_handles_cyclic_containers() -> None:
cyclic_list.append(cyclic_list)
# Should return without recursing forever
_force_load_include_files(cyclic_dict)
_force_load_include_files(cyclic_list)
force_load_include_files(cyclic_dict)
force_load_include_files(cyclic_list)
def test_discover_files_yaml_reload_failure(

View File

@@ -12,11 +12,15 @@ import esphome.config_validation as cv
from esphome.core import DocumentLocation, DocumentRange, EsphomeError
from esphome.util import OrderedDict
from esphome.yaml_util import (
DiscoveredYamlFiles,
ESPHomeDataBase,
ESPLiteralValue,
discover_user_yaml_files,
force_load_include_files,
format_path,
make_data_base,
make_literal,
track_yaml_loads,
)
@@ -966,3 +970,215 @@ def test_make_literal_blocks_substitution() -> None:
# undefined in the context.
assert result == {"pin": "${PIN}"}
assert isinstance(result, ESPLiteralValue)
# ---------------------------------------------------------------------------
# force_load_include_files / discover_user_yaml_files
# ---------------------------------------------------------------------------
class _StubInclude:
"""Stand-in for `IncludeFile` that records how `load()` was called.
Patched in via `esphome.yaml_util.IncludeFile` so the recursion in
`force_load_include_files` treats instances as deferred includes without
needing an actual on-disk file.
"""
def __init__(
self,
file: str = "stub.yaml",
parent_file: Path | None = None,
*,
unresolved: bool = False,
load_result: object = None,
raise_on_load: EsphomeError | None = None,
) -> None:
self.file = Path(file)
self.parent_file = parent_file or Path("/tmp/parent.yaml")
self._unresolved = unresolved
self._load_result = load_result if load_result is not None else {}
self._raise = raise_on_load
self.load_calls = 0
def has_unresolved_expressions(self) -> bool:
return self._unresolved
def load(self) -> object:
self.load_calls += 1
if self._raise is not None:
raise self._raise
return self._load_result
@pytest.fixture
def patch_include_file():
"""Replace `IncludeFile` with `_StubInclude` so isinstance checks in
`force_load_include_files` match the stubs constructed by tests."""
with patch("esphome.yaml_util.IncludeFile", _StubInclude):
yield
def test_force_load_include_files_resolves_nested_includes(
patch_include_file: None,
) -> None:
"""A tree of dict/list/IncludeFile is walked and every IncludeFile is loaded."""
inner = _StubInclude("inner.yaml")
outer = _StubInclude("outer.yaml", load_result={"nested": inner})
force_load_include_files([{"a": outer}, "scalar"])
assert outer.load_calls == 1
assert inner.load_calls == 1
def test_force_load_include_files_seen_guard_prevents_double_load(
patch_include_file: None,
) -> None:
"""The same IncludeFile referenced from two branches loads once."""
stub = _StubInclude("once.yaml")
force_load_include_files({"a": stub, "b": [stub]})
assert stub.load_calls == 1
def test_force_load_include_files_handles_cyclic_containers() -> None:
"""Cyclic dict/list references don't trigger infinite recursion."""
cyclic_dict: dict[str, object] = {}
cyclic_dict["self"] = cyclic_dict
cyclic_list: list[object] = []
cyclic_list.append(cyclic_list)
# Both calls must return without recursing forever.
force_load_include_files(cyclic_dict)
force_load_include_files(cyclic_list)
@pytest.mark.parametrize(
("warn_on_unresolved", "expect_level"),
[
pytest.param(True, "WARNING", id="default-warns"),
pytest.param(False, "DEBUG", id="opt-in-demotes"),
],
)
def test_force_load_include_files_unresolved_log_level(
patch_include_file: None,
caplog: pytest.LogCaptureFixture,
warn_on_unresolved: bool,
expect_level: str,
) -> None:
"""Substitution-templated include paths skip the load and log at the
level chosen by `warn_on_unresolved`."""
stub = _StubInclude("${var}.yaml", unresolved=True)
with caplog.at_level("DEBUG", logger="esphome.yaml_util"):
force_load_include_files({"k": stub}, warn_on_unresolved=warn_on_unresolved)
assert stub.load_calls == 0
matching = [
r.levelname for r in caplog.records if "Cannot resolve !include" in r.message
]
assert matching == [expect_level]
def test_force_load_include_files_warns_on_load_failure(
patch_include_file: None,
caplog: pytest.LogCaptureFixture,
) -> None:
"""An `EsphomeError` raised by `load()` is caught and logged, not propagated."""
stub = _StubInclude("missing.yaml", raise_on_load=EsphomeError("boom"))
with caplog.at_level("WARNING", logger="esphome.yaml_util"):
force_load_include_files({"k": stub})
assert any(
"Failed to load !include" in r.message and "missing.yaml" in r.message
for r in caplog.records
)
def test_discovered_yaml_files_holds_files_and_secrets() -> None:
"""`DiscoveredYamlFiles` is a small data carrier; both fields are mandatory."""
files = [Path("/tmp/a.yaml")]
secrets = {Path("/tmp/a.yaml")}
discovered = DiscoveredYamlFiles(files, secrets)
assert discovered.files is files
assert discovered.secrets is secrets
def _write(tmp_path: Path, name: str, content: str) -> Path:
"""Write `content` to `tmp_path/name`, creating parent dirs as needed."""
path = tmp_path / name
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(content)
return path
def _write_entry_including(tmp_path: Path, included_name: str) -> Path:
"""Write a minimal entry yaml that `!include`s `included_name`."""
return _write(
tmp_path,
"entry.yaml",
f"esphome:\n name: test\nwifi: !include {included_name}\n",
)
def test_discover_user_yaml_files_captures_includes(tmp_path: Path) -> None:
"""A `!include` in the entry yaml is force-loaded so the listener fires."""
_write(tmp_path, "wifi.yaml", "ssid: my_ssid\npassword: my_pw\n")
discovered = discover_user_yaml_files(_write_entry_including(tmp_path, "wifi.yaml"))
names = {p.name for p in discovered.files}
assert names == {"entry.yaml", "wifi.yaml"}
assert discovered.secrets == set()
@pytest.mark.parametrize(
"secret_name",
[
pytest.param("secrets.yaml", id="yaml"),
pytest.param("secrets.yml", id="yml"),
],
)
def test_discover_user_yaml_files_flags_secrets_filename(
tmp_path: Path, secret_name: str
) -> None:
"""Both `secrets.yaml` and `secrets.yml` get flagged in `.secrets`."""
_write(tmp_path, secret_name, "key: value\n")
discovered = discover_user_yaml_files(_write_entry_including(tmp_path, secret_name))
assert (tmp_path / secret_name).resolve() in discovered.secrets
def test_discover_user_yaml_files_flags_secrets_symlink(tmp_path: Path) -> None:
"""`secrets.yaml` symlinked to a non-secrets-named target is still flagged
because the un-resolved basename is what gets recorded."""
target = _write(tmp_path, "real_creds.yaml", "key: value\n")
(tmp_path / "secrets.yaml").symlink_to(target)
discovered = discover_user_yaml_files(
_write_entry_including(tmp_path, "secrets.yaml")
)
# The recorded "secret path" is the resolved target — even though its
# basename is `real_creds.yaml`, it's still in `.secrets`.
assert target.resolve() in discovered.secrets
def test_discover_user_yaml_files_swallows_parse_errors(tmp_path: Path) -> None:
"""A YAML parse failure returns whatever was tracked so far without raising."""
entry = _write(tmp_path, "entry.yaml", "esphome: [unterminated\n")
discovered = discover_user_yaml_files(entry)
assert isinstance(discovered, DiscoveredYamlFiles)
def test_discover_user_yaml_files_deduplicates(tmp_path: Path) -> None:
"""The same file referenced twice appears once in `.files`."""
_write(tmp_path, "wifi.yaml", "ssid: a\n")
entry = _write(
tmp_path,
"entry.yaml",
"esphome:\n name: test\nwifi: !include wifi.yaml\nfoo: !include wifi.yaml\n",
)
discovered = discover_user_yaml_files(entry)
wifi_resolved = (tmp_path / "wifi.yaml").resolve()
assert discovered.files.count(wifi_resolved) == 1
def test_track_yaml_loads_records_resolved_paths(tmp_path: Path) -> None:
"""`track_yaml_loads` is the building block — sanity-check it resolves
symlinks so callers can dedupe by identity."""
target = _write(tmp_path, "actual.yaml", "esphome:\n name: t\n")
link = tmp_path / "alias.yaml"
link.symlink_to(target)
with track_yaml_loads() as loaded:
yaml_util.load_yaml(link)
assert target.resolve() in loaded