diff --git a/esphome/bundle.py b/esphome/bundle.py index 70c4fad0fd..4537cbce9d 100644 --- a/esphome/bundle.py +++ b/esphome/bundle.py @@ -260,42 +260,20 @@ class ConfigBundleCreator: def _discover_yaml_includes(self) -> None: """Discover YAML files loaded during config parsing. - Deliberately uses a fresh re-parse and force-loads every deferred - ``IncludeFile`` to include *all* potentially-reachable includes, - even branches not selected by the local substitutions. Bundles are - meant to be compiled on another system where command-line - substitution overrides may choose a different branch — e.g. - ``!include network/${eth_model}/config.yaml`` must ship every - candidate so the remote build can pick any one. - - Entries with unresolved substitution variables in the filename - path are skipped with a warning (they cannot be resolved without - the substitution pass). - - Secrets files are tracked separately so we can filter them to - only include the keys this config actually references. + Delegates to :func:`yaml_util.discover_user_yaml_files`, which does a + fresh re-parse and force-loads every deferred ``IncludeFile`` so that + *all* potentially-reachable includes are captured (even branches not + selected by local substitutions). Bundles are meant to be compiled on + another system where command-line substitution overrides may choose a + different branch — e.g. ``!include network/${eth_model}/config.yaml`` + must ship every candidate so the remote build can pick any one. """ - # Must be a fresh parse: IncludeFile.load() caches its result in - # _content, and we discover files by listening for loader calls. On - # an already-parsed tree the cache is populated, .load() returns - # without calling the loader, the listener never fires, and the - # referenced files would be silently dropped from the bundle. - with yaml_util.track_yaml_loads() as loaded_files: - try: - data = yaml_util.load_yaml(self._config_path) - except EsphomeError: - _LOGGER.debug( - "Bundle: re-loading YAML for include discovery failed, " - "proceeding with partial file list" - ) - else: - _force_load_include_files(data) - - for fpath in loaded_files: - if fpath == self._config_path.resolve(): + discovered = yaml_util.discover_user_yaml_files(self._config_path) + self._secrets_paths.update(discovered.secrets) + config_resolved = self._config_path.resolve() + for fpath in discovered.files: + if fpath == config_resolved: continue # Already added as config - if fpath.name in const.SECRETS_FILES: - self._secrets_paths.add(fpath) self._add_file(fpath) def _discover_component_files(self) -> None: @@ -625,57 +603,6 @@ def _add_bytes_to_tar(tar: tarfile.TarFile, name: str, data: bytes) -> None: tar.addfile(info, io.BytesIO(data)) -def _force_load_include_files(obj: Any, _seen: set[int] | None = None) -> None: - """Recursively resolve any ``IncludeFile`` instances in a YAML tree. - - Nested ``!include`` returns a deferred ``IncludeFile`` that is only - resolved during the substitution pass. During bundle discovery we need - the referenced files to actually load so the ``track_yaml_loads`` - listener fires for them. - - ``IncludeFile`` instances with unresolved substitution variables in the - filename cannot be loaded — we skip and warn about those. - """ - if _seen is None: - _seen = set() - - if isinstance(obj, yaml_util.IncludeFile): - if id(obj) in _seen: - return - _seen.add(id(obj)) - if obj.has_unresolved_expressions(): - _LOGGER.warning( - "Bundle: cannot resolve !include %s (referenced from %s) " - "with substitutions in path", - obj.file, - obj.parent_file, - ) - return - try: - loaded = obj.load() - except EsphomeError as err: - _LOGGER.warning( - "Bundle: failed to load !include %s (referenced from %s): %s", - obj.file, - obj.parent_file, - err, - ) - return - _force_load_include_files(loaded, _seen) - elif isinstance(obj, dict): - if id(obj) in _seen: - return - _seen.add(id(obj)) - for value in obj.values(): - _force_load_include_files(value, _seen) - elif isinstance(obj, (list, tuple)): - if id(obj) in _seen: - return - _seen.add(id(obj)) - for item in obj: - _force_load_include_files(item, _seen) - - def _resolve_include_path(include_path: Any) -> Path | None: """Resolve an include path to absolute, skipping system includes.""" if isinstance(include_path, str) and include_path.startswith("<"): diff --git a/esphome/yaml_util.py b/esphome/yaml_util.py index b153d160a7..b56d024418 100644 --- a/esphome/yaml_util.py +++ b/esphome/yaml_util.py @@ -2,6 +2,7 @@ from __future__ import annotations from collections.abc import Callable, Generator from contextlib import contextmanager, suppress +from dataclasses import dataclass, field import functools import inspect from io import BytesIO, TextIOBase, TextIOWrapper @@ -233,6 +234,130 @@ class IncludeFile: return has_substitution_or_expression(str(self.file)) +def force_load_include_files( + obj: Any, + *, + warn_on_unresolved: bool = True, + _seen: set[int] | None = None, +) -> None: + """Recursively resolve any deferred ``IncludeFile`` instances in a YAML tree. + + Nested ``!include`` returns a deferred ``IncludeFile`` that is only resolved + later (substitution / packages pass). Callers that need every referenced + file to actually load — bundle discovery, on-device YAML recovery — invoke + this while a :func:`track_yaml_loads` listener is active so the underlying + loader fires and records every reachable file. + + ``IncludeFile`` instances whose path contains unresolved substitution + variables cannot be loaded. By default a warning is logged for each one; + pass ``warn_on_unresolved=False`` (used by discovery paths that run on a + fresh re-parse where substitutions haven't been applied yet) to demote it + to a debug log. + """ + if _seen is None: + _seen = set() + + if isinstance(obj, IncludeFile): + if id(obj) in _seen: + return + _seen.add(id(obj)) + if obj.has_unresolved_expressions(): + log = _LOGGER.warning if warn_on_unresolved else _LOGGER.debug + log( + "Cannot resolve !include %s (referenced from %s) with substitutions in path", + obj.file, + obj.parent_file, + ) + return + try: + loaded = obj.load() + except EsphomeError as err: + _LOGGER.warning( + "Failed to load !include %s (referenced from %s): %s", + obj.file, + obj.parent_file, + err, + ) + return + force_load_include_files( + loaded, warn_on_unresolved=warn_on_unresolved, _seen=_seen + ) + elif isinstance(obj, dict): + if id(obj) in _seen: + return + _seen.add(id(obj)) + for value in obj.values(): + force_load_include_files( + value, warn_on_unresolved=warn_on_unresolved, _seen=_seen + ) + elif isinstance(obj, (list, tuple)): + if id(obj) in _seen: + return + _seen.add(id(obj)) + for item in obj: + force_load_include_files( + item, warn_on_unresolved=warn_on_unresolved, _seen=_seen + ) + + +@dataclass(slots=True) +class DiscoveredYamlFiles: + """Result of :func:`discover_user_yaml_files`. + + ``files`` contains every resolved path the YAML loader touched while we + were re-parsing the user's config; ``secrets`` is the subset whose + *un-resolved* filename matched :data:`esphome.const.SECRETS_FILES` (so + a ``secrets.yaml`` symlinked to a differently-named target is still + flagged as secrets). + """ + + files: list[Path] = field(default_factory=list) + secrets: set[Path] = field(default_factory=set) + + +def discover_user_yaml_files(config_path: Path) -> DiscoveredYamlFiles: + """Fresh-re-parse ``config_path`` and report every file the YAML loader + pulled in, plus which of them came in under a secrets filename. + + Does NOT run schema validation, substitutions, or package resolution — so + component-internal YAML loaded by validators (LVGL helpers, dashboard + imports, etc.) is *not* captured. Deferred ``!include`` references whose + paths don't depend on substitutions are force-loaded here so they're + captured too. + + Must run on a fresh parse because :meth:`IncludeFile.load` caches its + result; on an already-resolved tree :meth:`load` returns without invoking + the loader and the listener would not fire for the referenced files. + """ + from esphome.const import SECRETS_FILES + + secrets: set[Path] = set() + + def _capture_secret(fname: Path) -> None: + if Path(fname).name in SECRETS_FILES: + secrets.add(Path(fname).resolve()) + + with track_yaml_loads() as loaded: + _load_listeners.append(_capture_secret) + try: + try: + data = load_yaml(config_path) + except EsphomeError: + return DiscoveredYamlFiles(list(loaded), secrets) + force_load_include_files(data, warn_on_unresolved=False) + finally: + _load_listeners.remove(_capture_secret) + + # Deduplicate while preserving first-seen order. + seen: set[Path] = set() + unique: list[Path] = [] + for path in loaded: + if path not in seen: + seen.add(path) + unique.append(path) + return DiscoveredYamlFiles(unique, secrets) + + def _add_data_ref(fn): @functools.wraps(fn) def wrapped(loader, node): diff --git a/tests/unit_tests/test_bundle.py b/tests/unit_tests/test_bundle.py index 5d046252da..f15bbf2e29 100644 --- a/tests/unit_tests/test_bundle.py +++ b/tests/unit_tests/test_bundle.py @@ -22,13 +22,13 @@ from esphome.bundle import ( _add_bytes_to_tar, _default_target_dir, _find_used_secret_keys, - _force_load_include_files, extract_bundle, is_bundle_path, prepare_bundle_for_compile, read_bundle_manifest, ) from esphome.core import CORE, EsphomeError +from esphome.yaml_util import force_load_include_files # --------------------------------------------------------------------------- # Helpers @@ -947,7 +947,7 @@ def test_discover_files_nested_include_load_failure( paths = [f.path for f in files] assert "test.yaml" in paths assert any( - "failed to load !include" in r.message and "missing.yaml" in r.message + "failed to load !include" in r.message.lower() and "missing.yaml" in r.message for r in caplog.records ) @@ -974,8 +974,8 @@ def test_force_load_skips_duplicate_include_file() -> None: # Same instance appears twice — second visit must hit the _seen guard. tree = {"a": stub, "b": [stub]} - with patch("esphome.bundle.yaml_util.IncludeFile", _StubInclude): - _force_load_include_files(tree) + with patch("esphome.yaml_util.IncludeFile", _StubInclude): + force_load_include_files(tree) assert stub.load_calls == 1 @@ -989,8 +989,8 @@ def test_force_load_handles_cyclic_containers() -> None: cyclic_list.append(cyclic_list) # Should return without recursing forever - _force_load_include_files(cyclic_dict) - _force_load_include_files(cyclic_list) + force_load_include_files(cyclic_dict) + force_load_include_files(cyclic_list) def test_discover_files_yaml_reload_failure( diff --git a/tests/unit_tests/test_yaml_util.py b/tests/unit_tests/test_yaml_util.py index ace92fbf6f..e97a188be4 100644 --- a/tests/unit_tests/test_yaml_util.py +++ b/tests/unit_tests/test_yaml_util.py @@ -12,11 +12,15 @@ import esphome.config_validation as cv from esphome.core import DocumentLocation, DocumentRange, EsphomeError from esphome.util import OrderedDict from esphome.yaml_util import ( + DiscoveredYamlFiles, ESPHomeDataBase, ESPLiteralValue, + discover_user_yaml_files, + force_load_include_files, format_path, make_data_base, make_literal, + track_yaml_loads, ) @@ -966,3 +970,215 @@ def test_make_literal_blocks_substitution() -> None: # undefined in the context. assert result == {"pin": "${PIN}"} assert isinstance(result, ESPLiteralValue) + + +# --------------------------------------------------------------------------- +# force_load_include_files / discover_user_yaml_files +# --------------------------------------------------------------------------- + + +class _StubInclude: + """Stand-in for `IncludeFile` that records how `load()` was called. + + Patched in via `esphome.yaml_util.IncludeFile` so the recursion in + `force_load_include_files` treats instances as deferred includes without + needing an actual on-disk file. + """ + + def __init__( + self, + file: str = "stub.yaml", + parent_file: Path | None = None, + *, + unresolved: bool = False, + load_result: object = None, + raise_on_load: EsphomeError | None = None, + ) -> None: + self.file = Path(file) + self.parent_file = parent_file or Path("/tmp/parent.yaml") + self._unresolved = unresolved + self._load_result = load_result if load_result is not None else {} + self._raise = raise_on_load + self.load_calls = 0 + + def has_unresolved_expressions(self) -> bool: + return self._unresolved + + def load(self) -> object: + self.load_calls += 1 + if self._raise is not None: + raise self._raise + return self._load_result + + +@pytest.fixture +def patch_include_file(): + """Replace `IncludeFile` with `_StubInclude` so isinstance checks in + `force_load_include_files` match the stubs constructed by tests.""" + with patch("esphome.yaml_util.IncludeFile", _StubInclude): + yield + + +def test_force_load_include_files_resolves_nested_includes( + patch_include_file: None, +) -> None: + """A tree of dict/list/IncludeFile is walked and every IncludeFile is loaded.""" + inner = _StubInclude("inner.yaml") + outer = _StubInclude("outer.yaml", load_result={"nested": inner}) + force_load_include_files([{"a": outer}, "scalar"]) + assert outer.load_calls == 1 + assert inner.load_calls == 1 + + +def test_force_load_include_files_seen_guard_prevents_double_load( + patch_include_file: None, +) -> None: + """The same IncludeFile referenced from two branches loads once.""" + stub = _StubInclude("once.yaml") + force_load_include_files({"a": stub, "b": [stub]}) + assert stub.load_calls == 1 + + +def test_force_load_include_files_handles_cyclic_containers() -> None: + """Cyclic dict/list references don't trigger infinite recursion.""" + cyclic_dict: dict[str, object] = {} + cyclic_dict["self"] = cyclic_dict + cyclic_list: list[object] = [] + cyclic_list.append(cyclic_list) + # Both calls must return without recursing forever. + force_load_include_files(cyclic_dict) + force_load_include_files(cyclic_list) + + +@pytest.mark.parametrize( + ("warn_on_unresolved", "expect_level"), + [ + pytest.param(True, "WARNING", id="default-warns"), + pytest.param(False, "DEBUG", id="opt-in-demotes"), + ], +) +def test_force_load_include_files_unresolved_log_level( + patch_include_file: None, + caplog: pytest.LogCaptureFixture, + warn_on_unresolved: bool, + expect_level: str, +) -> None: + """Substitution-templated include paths skip the load and log at the + level chosen by `warn_on_unresolved`.""" + stub = _StubInclude("${var}.yaml", unresolved=True) + with caplog.at_level("DEBUG", logger="esphome.yaml_util"): + force_load_include_files({"k": stub}, warn_on_unresolved=warn_on_unresolved) + assert stub.load_calls == 0 + matching = [ + r.levelname for r in caplog.records if "Cannot resolve !include" in r.message + ] + assert matching == [expect_level] + + +def test_force_load_include_files_warns_on_load_failure( + patch_include_file: None, + caplog: pytest.LogCaptureFixture, +) -> None: + """An `EsphomeError` raised by `load()` is caught and logged, not propagated.""" + stub = _StubInclude("missing.yaml", raise_on_load=EsphomeError("boom")) + with caplog.at_level("WARNING", logger="esphome.yaml_util"): + force_load_include_files({"k": stub}) + assert any( + "Failed to load !include" in r.message and "missing.yaml" in r.message + for r in caplog.records + ) + + +def test_discovered_yaml_files_holds_files_and_secrets() -> None: + """`DiscoveredYamlFiles` is a small data carrier; both fields are mandatory.""" + files = [Path("/tmp/a.yaml")] + secrets = {Path("/tmp/a.yaml")} + discovered = DiscoveredYamlFiles(files, secrets) + assert discovered.files is files + assert discovered.secrets is secrets + + +def _write(tmp_path: Path, name: str, content: str) -> Path: + """Write `content` to `tmp_path/name`, creating parent dirs as needed.""" + path = tmp_path / name + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content) + return path + + +def _write_entry_including(tmp_path: Path, included_name: str) -> Path: + """Write a minimal entry yaml that `!include`s `included_name`.""" + return _write( + tmp_path, + "entry.yaml", + f"esphome:\n name: test\nwifi: !include {included_name}\n", + ) + + +def test_discover_user_yaml_files_captures_includes(tmp_path: Path) -> None: + """A `!include` in the entry yaml is force-loaded so the listener fires.""" + _write(tmp_path, "wifi.yaml", "ssid: my_ssid\npassword: my_pw\n") + discovered = discover_user_yaml_files(_write_entry_including(tmp_path, "wifi.yaml")) + names = {p.name for p in discovered.files} + assert names == {"entry.yaml", "wifi.yaml"} + assert discovered.secrets == set() + + +@pytest.mark.parametrize( + "secret_name", + [ + pytest.param("secrets.yaml", id="yaml"), + pytest.param("secrets.yml", id="yml"), + ], +) +def test_discover_user_yaml_files_flags_secrets_filename( + tmp_path: Path, secret_name: str +) -> None: + """Both `secrets.yaml` and `secrets.yml` get flagged in `.secrets`.""" + _write(tmp_path, secret_name, "key: value\n") + discovered = discover_user_yaml_files(_write_entry_including(tmp_path, secret_name)) + assert (tmp_path / secret_name).resolve() in discovered.secrets + + +def test_discover_user_yaml_files_flags_secrets_symlink(tmp_path: Path) -> None: + """`secrets.yaml` symlinked to a non-secrets-named target is still flagged + because the un-resolved basename is what gets recorded.""" + target = _write(tmp_path, "real_creds.yaml", "key: value\n") + (tmp_path / "secrets.yaml").symlink_to(target) + discovered = discover_user_yaml_files( + _write_entry_including(tmp_path, "secrets.yaml") + ) + # The recorded "secret path" is the resolved target — even though its + # basename is `real_creds.yaml`, it's still in `.secrets`. + assert target.resolve() in discovered.secrets + + +def test_discover_user_yaml_files_swallows_parse_errors(tmp_path: Path) -> None: + """A YAML parse failure returns whatever was tracked so far without raising.""" + entry = _write(tmp_path, "entry.yaml", "esphome: [unterminated\n") + discovered = discover_user_yaml_files(entry) + assert isinstance(discovered, DiscoveredYamlFiles) + + +def test_discover_user_yaml_files_deduplicates(tmp_path: Path) -> None: + """The same file referenced twice appears once in `.files`.""" + _write(tmp_path, "wifi.yaml", "ssid: a\n") + entry = _write( + tmp_path, + "entry.yaml", + "esphome:\n name: test\nwifi: !include wifi.yaml\nfoo: !include wifi.yaml\n", + ) + discovered = discover_user_yaml_files(entry) + wifi_resolved = (tmp_path / "wifi.yaml").resolve() + assert discovered.files.count(wifi_resolved) == 1 + + +def test_track_yaml_loads_records_resolved_paths(tmp_path: Path) -> None: + """`track_yaml_loads` is the building block — sanity-check it resolves + symlinks so callers can dedupe by identity.""" + target = _write(tmp_path, "actual.yaml", "esphome:\n name: t\n") + link = tmp_path / "alias.yaml" + link.symlink_to(target) + with track_yaml_loads() as loaded: + yaml_util.load_yaml(link) + assert target.resolve() in loaded