[core] Skip !secret and !lambda in legacy fallback regex

Address reviewer feedback:

- legacy regex was wrapping password: !secret name and clobbering
  the dumper's user-friendly !secret round-trip; extend the negative
  lookahead to skip !secret (and !lambda) values entirely
- drop the in-replacement !lambda check now that the lookahead handles it
- reword the thread-safety claim in dump() since _SECRET_VALUES /
  _SECRET_CACHE remain module globals
- reword the SensitiveStr representer registration comment to reflect
  PyYAML's MRO-walked dispatch rather than registration order
- tighten the legacy regex comment

Adds tests for the !secret and !lambda skip paths.
This commit is contained in:
J. Nick Koston
2026-05-26 22:23:40 -05:00
parent 6d689e8297
commit bc2a811568
3 changed files with 30 additions and 32 deletions

View File

@@ -1420,19 +1420,15 @@ def command_config(args: ArgsProtocol, config: ConfigType) -> int | None:
return 0
# Legacy substring redaction fallback. Catches sensitive-looking fields that
# aren't yet tagged with ``cv.sensitive(...)`` so config dumps don't regress
# for unmigrated/third-party schemas. Each match also logs a one-time
# deprecation warning naming the field; this fallback is slated for removal
# in 2026.12.0 once canonical sensitive fields are migrated.
#
# The negative lookahead ``(?!\\033\[8m)`` skips values already wrapped by the
# SensitiveStr representer, so explicit tagging silences the warning. No
# trailing ``\w*`` after the fragment so the fragment must end the field
# name (preserves the prior regex's matching scope while letting the
# leading ``\w*`` capture the full field name for the warning).
# Legacy substring redaction fallback for unmigrated schemas; removed in
# 2026.12.0 once canonical sensitive fields are tagged. The lookahead skips
# values that already render themselves: ``\033[8m`` (SensitiveStr wrap),
# ``!secret`` (preserves the user-friendly tag), ``!lambda`` (multi-line
# block; first line is structural). Un-anchored on purpose to match the
# prior regex; leading ``\w*`` captures the full field name for warnings.
_LEGACY_REDACTION_RE = re.compile(
r"(?P<key>\w*(?:password|key|psk|ssid))\: (?!\\033\[8m)(?P<val>.+)"
r"(?P<key>\w*(?:password|key|psk|ssid))\: "
r"(?!\\033\[8m|!secret\b|!lambda\b)(?P<val>.+)"
)
_LEGACY_REDACTION_REMOVAL = "2026.12.0"
@@ -1441,15 +1437,8 @@ def _redact_with_legacy_fallback(output: str) -> str:
unmarked: set[str] = set()
def _replace(m: re.Match[str]) -> str:
val = m.group("val")
# Lambda values in a ``cv.sensitive(cv.templatable(...))`` field still
# hit this branch because ``Lambda`` isn't a ``str`` and so doesn't
# carry the ``SensitiveStr`` tag. The field itself IS tagged; warning
# the author to add ``cv.sensitive`` would be misleading. Still wrap
# the first line so the user-visible output matches the prior regex.
if not val.startswith("!lambda"):
unmarked.add(m.group("key"))
return f"{m.group('key')}: \\033[8m{val}\\033[28m"
unmarked.add(m.group("key"))
return f"{m.group('key')}: \\033[8m{m.group('val')}\\033[28m"
output = _LEGACY_REDACTION_RE.sub(_replace, output)
for key in sorted(unmarked):

View File

@@ -819,9 +819,9 @@ def dump(dict_, show_secrets=False, sort_keys=False):
_SECRET_VALUES.clear()
_SECRET_CACHE.clear()
# Per-call subclass so the redaction flag lives on the dumper class itself,
# not in module-level mutable state. Cheap (one type object per call),
# encapsulated, and thread-safe by construction.
# Per-call subclass so the redaction flag doesn't leak across calls.
# (``_SECRET_VALUES`` / ``_SECRET_CACHE`` remain module globals; YAML
# processing is single-threaded today, so this isolates only the flag.)
class _Dumper(ESPHomeDumper):
_redact_sensitive = not show_secrets
@@ -1101,8 +1101,7 @@ ESPHomeDumper.add_multi_representer(
)
ESPHomeDumper.add_multi_representer(bool, ESPHomeDumper.represent_bool)
ESPHomeDumper.add_multi_representer(str, ESPHomeDumper.represent_stringify)
# Registered after ``str`` so ``add_multi_representer``'s MRO lookup prefers
# the more specific ``SensitiveStr`` representer over the bare-``str`` one.
# MRO-walked dispatch; SensitiveStr's own entry wins over the str one.
ESPHomeDumper.add_multi_representer(SensitiveStr, ESPHomeDumper.represent_sensitive)
ESPHomeDumper.add_multi_representer(int, ESPHomeDumper.represent_int)
ESPHomeDumper.add_multi_representer(float, ESPHomeDumper.represent_float)

View File

@@ -388,17 +388,27 @@ def test_redact_with_legacy_fallback__deduplicates_warnings(
assert len(password_warnings) == 1
def test_redact_with_legacy_fallback__suppresses_warning_for_lambda(
def test_redact_with_legacy_fallback__skips_lambda_values(
caplog: pytest.LogCaptureFixture,
) -> None:
"""Lambda values in cv.sensitive(cv.templatable(...)) fields hit the
legacy regex (Lambda isn't str so SensitiveStr tagging doesn't apply),
but the field IS tagged. The warning would mislead the author, so it's
suppressed; the first line still gets wrapped for visual parity."""
"""``!lambda`` first line is structural, body is unreachable by a
single-line regex anyway, and tagged fields shouldn't trigger a warning."""
text = ' ssid: !lambda |-\n return "x";\n'
with caplog.at_level(logging.WARNING, logger="esphome.__main__"):
out = _redact_with_legacy_fallback(text)
assert "ssid: \\033[8m!lambda |-\\033[28m" in out
assert out == text
assert not any("legacy substring" in rec.message for rec in caplog.records)
def test_redact_with_legacy_fallback__skips_secret_references(
caplog: pytest.LogCaptureFixture,
) -> None:
"""``!secret name`` is the dumper's user-friendly representation; the
name isn't the secret, so wrapping it would clobber the round-trip."""
text = " password: !secret wifi_password\n"
with caplog.at_level(logging.WARNING, logger="esphome.__main__"):
out = _redact_with_legacy_fallback(text)
assert out == text
assert not any("legacy substring" in rec.message for rec in caplog.records)