mirror of
https://github.com/esphome/esphome.git
synced 2026-06-24 14:19:03 +00:00
[store_yaml] Refactor YAML discovery to share bundle.py's approach
The previous implementation extended `track_yaml_loads` across the entire validation pass to catch deferred `!include` and package loads, but that also captured framework YAML loaded internally by component validators (e.g. LVGL's `hello_world.yaml`) and produced spurious "unresolved substitution" warnings during the pre-validation force-load. bundle.py already had the right pattern: a fresh post-validation re-parse plus `force_load_include_files`. - Lift the discovery into `yaml_util.discover_user_yaml_files` and have both `bundle.py` and `config.py` use it (DRY). - Capture `secrets.yaml` / `secrets.yml` by the *un-resolved* listener fname so a `secrets.yaml` symlinked to a non-secrets-named target is still flagged for redaction. - Add a `warn_on_unresolved` flag to `force_load_include_files` so the discovery path (where substitutions haven't run) logs at debug instead of warning. - Reject `store_yaml` configs with an unencrypted API via `FINAL_VALIDATE_SCHEMA`; an explicit `allow_unencrypted: true` opt-out keeps lab setups working (renamed from `allow_unencrypted_api` so the integration-test harness's naive `api:` string replacement doesn't clobber it). - Annotate `store_yaml_chunk_buf` with the `cppcoreguidelines-avoid-non-const-global-variables` suppression that matches other intentional API-side globals. - Update unit tests to exercise the new `DiscoveredYamlFiles` shape plus a symlink-secrets case.
This commit is contained in:
@@ -1175,7 +1175,11 @@ void APIConnection::on_camera_image_request(const CameraImageRequest &msg) {
|
||||
// Chunk size per GetYamlResponse. Small enough to leave room for the protobuf frame
|
||||
// inside the 65535-byte API limit and friendly to TCP MSS.
|
||||
static constexpr size_t STORE_YAML_CHUNK_SIZE = 512;
|
||||
// Scratch buffer used to copy a chunk from PROGMEM (needed on ESP8266; harmless elsewhere).
|
||||
// Scratch buffer used to copy a chunk from PROGMEM (needed on ESP8266; harmless
|
||||
// elsewhere). Shared across connections is safe because the API loop is
|
||||
// single-threaded and each chunk is filled and consumed atomically inside one
|
||||
// `try_send_store_yaml_` iteration.
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
|
||||
static uint8_t store_yaml_chunk_buf[STORE_YAML_CHUNK_SIZE];
|
||||
|
||||
void APIConnection::on_get_yaml_request() {
|
||||
|
||||
@@ -8,8 +8,9 @@ from types import ModuleType
|
||||
|
||||
import esphome.codegen as cg
|
||||
import esphome.config_validation as cv
|
||||
from esphome.const import CONF_ID, CONF_RAW_DATA_ID, SECRETS_FILES
|
||||
from esphome.const import CONF_API, CONF_ID, CONF_RAW_DATA_ID
|
||||
from esphome.core import CORE, EsphomeError, HexInt
|
||||
import esphome.final_validate as fv
|
||||
|
||||
_LOGGER = logging.getLogger(__name__)
|
||||
|
||||
@@ -17,6 +18,10 @@ CODEOWNERS = ["@bdraco"]
|
||||
DEPENDENCIES = ["api"]
|
||||
|
||||
CONF_INCLUDE_SECRETS = "include_secrets"
|
||||
# Avoid an `_api:` substring in the key name so the integration-test harness
|
||||
# (which naively str-replaces `api:` to inject a port directive) doesn't
|
||||
# clobber configs that opt into this escape hatch.
|
||||
CONF_ALLOW_UNENCRYPTED = "allow_unencrypted"
|
||||
|
||||
store_yaml_ns = cg.esphome_ns.namespace("store_yaml")
|
||||
StoreYamlComponent = store_yaml_ns.class_("StoreYamlComponent", cg.Component)
|
||||
@@ -33,10 +38,38 @@ CONFIG_SCHEMA = cv.Schema(
|
||||
cv.GenerateID(): cv.declare_id(StoreYamlComponent),
|
||||
cv.GenerateID(CONF_RAW_DATA_ID): cv.declare_id(cg.uint8),
|
||||
cv.Optional(CONF_INCLUDE_SECRETS, default=False): cv.boolean,
|
||||
cv.Optional(CONF_ALLOW_UNENCRYPTED, default=False): cv.boolean,
|
||||
}
|
||||
).extend(cv.COMPONENT_SCHEMA)
|
||||
|
||||
|
||||
def _final_validate(config):
|
||||
"""Require API encryption: an unauthenticated client could otherwise pull
|
||||
the embedded YAML (which may include Wi-Fi credentials or opted-in
|
||||
secrets). The escape hatch ``allow_unencrypted_api: true`` exists for
|
||||
isolated lab setups where the user has accepted the trade-off."""
|
||||
full = fv.full_config.get()
|
||||
api_conf = full.get(CONF_API, {})
|
||||
if api_conf.get("encryption"):
|
||||
return config
|
||||
if config.get(CONF_ALLOW_UNENCRYPTED):
|
||||
_LOGGER.warning(
|
||||
"store_yaml is enabled without API encryption; any client that can "
|
||||
"reach the device on the network can pull the embedded YAML."
|
||||
)
|
||||
return config
|
||||
raise cv.Invalid(
|
||||
"store_yaml requires API encryption (configure `api.encryption.key`). "
|
||||
"Without encryption, the embedded YAML — which may contain Wi-Fi "
|
||||
"credentials or opted-in secrets — can be read by any client that "
|
||||
"reaches the device. Set `store_yaml.allow_unencrypted: true` to "
|
||||
"override after acknowledging the risk."
|
||||
)
|
||||
|
||||
|
||||
FINAL_VALIDATE_SCHEMA = _final_validate
|
||||
|
||||
|
||||
def _import_zstd() -> ModuleType:
|
||||
try:
|
||||
from compression import zstd # noqa: PLC0415 — Python 3.14+ stdlib
|
||||
@@ -53,8 +86,8 @@ def _import_zstd() -> ModuleType:
|
||||
|
||||
def _gather_files(include_secrets: bool) -> list[tuple[str, bytes]]:
|
||||
"""Read each YAML file the config loader touched, return (relative_path, content) pairs."""
|
||||
sources = CORE.data.get("yaml_sources")
|
||||
if not sources:
|
||||
discovered = CORE.data.get("yaml_sources")
|
||||
if not discovered or not discovered.files:
|
||||
raise EsphomeError(
|
||||
"store_yaml could not find any tracked YAML files; the config loader "
|
||||
"did not populate CORE.data['yaml_sources']."
|
||||
@@ -62,19 +95,14 @@ def _gather_files(include_secrets: bool) -> list[tuple[str, bytes]]:
|
||||
|
||||
config_path = Path(CORE.config_path).resolve()
|
||||
root = config_path.parent
|
||||
secret_paths = discovered.secrets
|
||||
|
||||
seen: set[Path] = set()
|
||||
files: list[tuple[str, bytes]] = []
|
||||
# ``track_yaml_loads`` already returns resolved Path objects, so no extra
|
||||
# ``.resolve()`` work is needed here. The symlink edge case (``secrets.yaml``
|
||||
# symlinked to a non-secrets filename) is not detectable at this layer
|
||||
# because the original path is lost by the resolver upstream.
|
||||
for path in sources:
|
||||
if path in seen:
|
||||
continue
|
||||
seen.add(path)
|
||||
|
||||
if path.name in SECRETS_FILES and not include_secrets:
|
||||
for path in discovered.files:
|
||||
# `secret_paths` was collected from the *un-resolved* basename, so a
|
||||
# `secrets.yaml` symlinked to a differently-named target is still
|
||||
# treated as secrets here.
|
||||
if path in secret_paths and not include_secrets:
|
||||
content = REDACTED_PLACEHOLDER
|
||||
else:
|
||||
try:
|
||||
|
||||
@@ -1191,33 +1191,28 @@ def _load_config(
|
||||
command_line_substitutions: dict[str, Any], skip_external_update: bool = False
|
||||
) -> Config:
|
||||
"""Load the configuration file."""
|
||||
# Keep the file-load listener active across both the YAML parse and the
|
||||
# validation pass. Substitution and packages resolve deferred `!include`
|
||||
# references during validation (and remote packages download YAML on
|
||||
# demand), so the listener must still be installed when those secondary
|
||||
# loads happen. Components that want the on-disk YAML at codegen time
|
||||
# (e.g. store_yaml for firmware recovery) read the list out of
|
||||
# CORE.data["yaml_sources"] after a successful validation.
|
||||
with yaml_util.track_yaml_loads() as loaded_files:
|
||||
try:
|
||||
config = yaml_util.load_yaml(CORE.config_path)
|
||||
# Resolve any deferred `!include`/package references whose paths
|
||||
# don't depend on substitutions, so they're captured here too.
|
||||
yaml_util.force_load_include_files(config)
|
||||
except EsphomeError as e:
|
||||
raise InvalidYAMLError(e) from e
|
||||
try:
|
||||
config = yaml_util.load_yaml(CORE.config_path)
|
||||
except EsphomeError as e:
|
||||
raise InvalidYAMLError(e) from e
|
||||
|
||||
try:
|
||||
result = validate_config(
|
||||
config, command_line_substitutions, skip_external_update
|
||||
)
|
||||
except EsphomeError:
|
||||
raise
|
||||
except Exception:
|
||||
_LOGGER.error("Unexpected exception while reading configuration:")
|
||||
raise
|
||||
try:
|
||||
result = validate_config(
|
||||
config, command_line_substitutions, skip_external_update
|
||||
)
|
||||
except EsphomeError:
|
||||
raise
|
||||
except Exception:
|
||||
_LOGGER.error("Unexpected exception while reading configuration:")
|
||||
raise
|
||||
|
||||
CORE.data["yaml_sources"] = loaded_files
|
||||
# Discover the user's on-disk YAML files via a fresh re-parse — same
|
||||
# pattern bundle.py uses. Doing it post-validation (rather than keeping a
|
||||
# listener installed across validation) avoids capturing framework YAML
|
||||
# that components load internally (e.g. LVGL's `hello_world.yaml`). The
|
||||
# result is consumed by components like store_yaml that want to embed the
|
||||
# user's configuration in firmware for recovery.
|
||||
CORE.data["yaml_sources"] = yaml_util.discover_user_yaml_files(CORE.config_path)
|
||||
return result
|
||||
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
api:
|
||||
|
||||
store_yaml:
|
||||
allow_unencrypted: true
|
||||
|
||||
@@ -11,3 +11,4 @@ logger:
|
||||
api:
|
||||
|
||||
store_yaml:
|
||||
allow_unencrypted: true
|
||||
|
||||
@@ -14,6 +14,7 @@ from esphome.components.store_yaml import (
|
||||
_pack_envelope,
|
||||
)
|
||||
from esphome.core import CORE, EsphomeError
|
||||
from esphome.yaml_util import DiscoveredYamlFiles
|
||||
|
||||
|
||||
def _unpack_envelope(blob: bytes) -> dict[str, bytes]:
|
||||
@@ -57,13 +58,21 @@ def _reset_core() -> None:
|
||||
CORE.config_path = None
|
||||
|
||||
|
||||
def _set_sources(project_dir: Path, *names: str) -> None:
|
||||
def _set_sources(project_dir: Path, *names: str, secrets: tuple[str, ...] = ()) -> None:
|
||||
CORE.config_path = project_dir / "entry.yaml"
|
||||
CORE.data["yaml_sources"] = [project_dir / name for name in names]
|
||||
files = [project_dir / name for name in names]
|
||||
secret_paths = {(project_dir / name).resolve() for name in secrets}
|
||||
CORE.data["yaml_sources"] = DiscoveredYamlFiles(files, secret_paths)
|
||||
|
||||
|
||||
def test_gather_redacts_secrets_by_default(project: Path) -> None:
|
||||
_set_sources(project, "entry.yaml", "wifi.yaml", "secrets.yaml")
|
||||
_set_sources(
|
||||
project,
|
||||
"entry.yaml",
|
||||
"wifi.yaml",
|
||||
"secrets.yaml",
|
||||
secrets=("secrets.yaml",),
|
||||
)
|
||||
files = dict(_gather_files(include_secrets=False))
|
||||
assert files["secrets.yaml"] == REDACTED_PLACEHOLDER
|
||||
assert b"SUPER_SECRET" not in files["secrets.yaml"]
|
||||
@@ -73,13 +82,33 @@ def test_gather_redacts_secrets_by_default(project: Path) -> None:
|
||||
def test_gather_redacts_yml_extension(project: Path) -> None:
|
||||
yml = project / "secrets.yml"
|
||||
yml.write_text("api_key: OTHER_SECRET\n")
|
||||
_set_sources(project, "entry.yaml", "secrets.yml")
|
||||
_set_sources(project, "entry.yaml", "secrets.yml", secrets=("secrets.yml",))
|
||||
files = dict(_gather_files(include_secrets=False))
|
||||
assert files["secrets.yml"] == REDACTED_PLACEHOLDER
|
||||
|
||||
|
||||
def test_gather_redacts_secret_symlinked_to_other_name(
|
||||
project: Path, tmp_path: Path
|
||||
) -> None:
|
||||
"""A `secrets.yaml` symlinked to a non-secrets-named target is still redacted
|
||||
because the un-resolved basename was captured upstream."""
|
||||
target = tmp_path / "actual_creds.yaml"
|
||||
target.write_text("api_key: FROM_SYMLINK\n")
|
||||
link = project / "secrets.yaml"
|
||||
link.unlink() # remove the regular file laid down by the fixture
|
||||
link.symlink_to(target)
|
||||
# Discovery records the un-resolved listener fname under SECRETS_FILES
|
||||
# but stores the resolved path; mimic that here.
|
||||
resolved = link.resolve()
|
||||
CORE.config_path = project / "entry.yaml"
|
||||
CORE.data["yaml_sources"] = DiscoveredYamlFiles([resolved], {resolved})
|
||||
files = dict(_gather_files(include_secrets=False))
|
||||
assert REDACTED_PLACEHOLDER in files.values()
|
||||
assert b"FROM_SYMLINK" not in b"".join(files.values())
|
||||
|
||||
|
||||
def test_gather_embeds_secrets_when_opted_in(project: Path) -> None:
|
||||
_set_sources(project, "entry.yaml", "secrets.yaml")
|
||||
_set_sources(project, "entry.yaml", "secrets.yaml", secrets=("secrets.yaml",))
|
||||
files = dict(_gather_files(include_secrets=True))
|
||||
assert b"SUPER_SECRET" in files["secrets.yaml"]
|
||||
|
||||
@@ -90,21 +119,16 @@ def test_gather_uses_relative_path_for_external_files(
|
||||
"""Files outside the project root use a ``..``-style relative path so they don't collide."""
|
||||
sibling = tmp_path / "outside.yaml"
|
||||
sibling.write_text("foo: bar\n")
|
||||
_set_sources(project, "entry.yaml")
|
||||
CORE.data["yaml_sources"].append(sibling)
|
||||
CORE.config_path = project / "entry.yaml"
|
||||
CORE.data["yaml_sources"] = DiscoveredYamlFiles(
|
||||
[project / "entry.yaml", sibling], set()
|
||||
)
|
||||
files = dict(_gather_files(include_secrets=False))
|
||||
# project root is `tmp_path/project`, sibling is in `tmp_path` so it
|
||||
# resolves to `../outside.yaml`.
|
||||
assert "../outside.yaml" in files
|
||||
|
||||
|
||||
def test_gather_deduplicates(project: Path) -> None:
|
||||
_set_sources(project, "entry.yaml", "wifi.yaml", "wifi.yaml")
|
||||
files = _gather_files(include_secrets=False)
|
||||
paths = [p for p, _ in files]
|
||||
assert paths.count("wifi.yaml") == 1
|
||||
|
||||
|
||||
def test_gather_raises_when_no_sources(project: Path) -> None:
|
||||
CORE.config_path = project / "entry.yaml"
|
||||
with pytest.raises(EsphomeError):
|
||||
|
||||
Reference in New Issue
Block a user