[tests] Fail component test merge on conflicting duplicate IDs (#16849)

This commit is contained in:
J. Nick Koston
2026-06-08 12:49:25 -05:00
committed by GitHub
parent 54c73bf1bc
commit 36e043debb
125 changed files with 836 additions and 372 deletions

View File

@@ -39,8 +39,13 @@ from helpers import BASE_BUS_COMPONENTS, is_validate_only_file
from esphome import yaml_util
from esphome.config_helpers import Extend, Remove
# Path to common bus configs
COMMON_BUS_PATH = Path("tests/test_build_components/common")
# Path to common bus configs (resolved relative to this file, not the CWD)
COMMON_BUS_PATH = (
Path(__file__).resolve().parent.parent
/ "tests"
/ "test_build_components"
/ "common"
)
# Package dependencies - maps packages to the packages they include
# When a component uses a package on the left, it automatically gets

View File

@@ -0,0 +1,214 @@
#!/usr/bin/env python3
"""Fail when two component test fixtures define the same id with different content.
Component tests are merged and built in groups in CI (see
``script/merge_component_configs.py``). When two components declare the same id
under the same section but with different content, the merge keeps the first and
drops the rest, which can make a cross-reference resolve to an incompatible
entity (this is what broke the i2s_audio speaker tests). That only surfaces when
the two components happen to land in the same group, often in an unrelated PR
long after the duplicate was written.
This script is the complete, batch-independent guard: it scans every component's
``test.<platform>.yaml`` per platform and reports any id that is defined by more
than one component with differing content, so a collision fails the PR that
introduces it and names the exact id and components.
To stay byte-for-byte consistent with what the merge actually does (so the guard
never disagrees with the build), it reuses the merge's own helpers:
* ``prefix_substitutions_in_dict`` -- the merge prefixes every component's
substitution references with the component name before deduplicating, so e.g.
``pin: ${pin}`` in two components becomes ``${a_pin}`` and ``${b_pin}`` and
conflicts. We apply the same prefixing; otherwise a shared id whose only
difference is a substitution looks identical here but conflicts at merge time.
* ``deduplicate_by_id`` -- the actual merge comparison (including the
``INTENTIONALLY_SHARED_IDS`` allowlist for deliberately shared singletons such
as ``sntp_time``). We feed each shared id's prefixed items straight through it
and treat a raised ``ValueError`` as a conflict, so this check and the merge
can never diverge.
``packages:`` are left as opaque ``!include`` objects by the loader -- exactly as
the merge sees them at dedup time -- so package-provided bus ids (``i2c_bus`` ...)
are not compared here, matching the merge, which re-adds those packages once.
"""
from __future__ import annotations
from collections import defaultdict
from collections.abc import Iterator
from dataclasses import dataclass, field
from pathlib import Path
import sys
sys.path.insert(0, str(Path(__file__).parent.parent))
from esphome.core import EsphomeError # noqa: E402
from script.merge_component_configs import ( # noqa: E402
deduplicate_by_id,
load_yaml_file,
prepare_component_body,
)
# Resolved relative to this file (not the CWD) so the scan cannot silently cover
# nothing when run from a different directory.
TESTS_DIR = Path(__file__).resolve().parent.parent / "tests" / "components"
def _collect_ids(
data: object,
path: tuple[str, ...],
out: dict[tuple[tuple[str, ...], object], object],
) -> None:
"""Record (dict_path, id) -> item for id-bearing items in dict-reachable lists.
Keyed by the full dict path (not just the immediate key) so items under
different paths that happen to share a list key name are never compared. Only
lists reached purely through dict keys are recorded: once the merge
concatenates a list, items from different components live in separate elements,
so anything deeper is never compared across components (matching how
``merge_config`` combines bodies). Ids keep their original type so ``5`` and
``"5"`` stay distinct, exactly as ``deduplicate_by_id`` treats them; an
unhashable id (rare) falls back to its ``repr`` so it can still be grouped.
"""
if not isinstance(data, dict):
return
for key, value in data.items():
new_path = path + (key,)
if isinstance(value, list):
for item in value:
if isinstance(item, dict) and "id" in item:
item_id = item["id"]
try:
hash(item_id)
except TypeError:
item_id = repr(item_id)
out[(new_path, item_id)] = item
elif isinstance(value, dict):
_collect_ids(value, new_path, out)
def _discover_platforms() -> set[str]:
platforms: set[str] = set()
for test_file in TESTS_DIR.glob("*/test.*.yaml"):
# test.<platform>.yaml -> platform is the middle dotted part
parts = test_file.name.split(".")
if len(parts) == 3:
platforms.add(parts[1])
return platforms
def _load_components(
platform: str, parse_errors: list[str]
) -> Iterator[tuple[str, object]]:
"""Yield (component, prefixed config) for each component testing this platform.
Each body is prepared with ``prepare_component_body`` (the same helper the
merge uses: it expands component-specific package includes and prefixes
substitutions), so the comparison sees what the build merges. Fixtures that
fail to parse are recorded in ``parse_errors`` so the run can fail rather than
silently skip them.
"""
for comp_dir in sorted(TESTS_DIR.iterdir()):
test_file = comp_dir / f"test.{platform}.yaml"
if not comp_dir.is_dir() or not test_file.exists():
continue
try:
data = load_yaml_file(test_file)
except EsphomeError as err:
parse_errors.append(str(test_file))
print(f"ERROR: could not parse {test_file}: {err}", file=sys.stderr)
continue
yield comp_dir.name, prepare_component_body(data, comp_dir.name, comp_dir)
@dataclass
class ScanResult:
"""Outcome of a scan. A caller cannot observe a clean result while files were
skipped or nothing was scanned -- all three fields are reported together."""
conflicts: list[str] = field(default_factory=list)
parse_errors: list[str] = field(default_factory=list)
components_scanned: int = 0
def scan() -> ScanResult:
"""Scan every component's base test fixture and report cross-component id conflicts.
Only base ``test.<platform>.yaml`` fixtures are scanned because only those are
combined by ``merge_component_configs`` in grouped CI builds; variant
(``test-*.yaml``) fixtures are built individually and never cross-merged.
"""
result = ScanResult()
for platform in sorted(_discover_platforms()):
# (dict_path, id) -> {component: prefixed_item}
groups: dict[tuple[tuple[str, ...], object], dict[str, object]] = defaultdict(
dict
)
for component, data in _load_components(platform, result.parse_errors):
result.components_scanned += 1
collected: dict[tuple[tuple[str, ...], object], object] = {}
_collect_ids(data, (), collected)
for key, item in collected.items():
groups[key][component] = item
for (path, id_), by_component in sorted(
groups.items(), key=lambda kv: (kv[0][0], str(kv[0][1]))
):
if len(by_component) < 2:
continue
# Delegate the decision to the merge's own deduplication so this guard
# can never disagree with what the build does.
try:
deduplicate_by_id({path[-1]: list(by_component.values())})
except ValueError:
result.conflicts.append(
f"[{platform}] id '{id_}' under '{'.'.join(path)}' is defined "
f"differently by: {', '.join(sorted(by_component))}"
)
return result
def main() -> int:
result = scan()
if result.conflicts:
print("Conflicting test component ids found:\n")
for line in result.conflicts:
print(f" - {line}")
print(
"\nGive each component a unique id (e.g. '<component>_<id>'), or add the "
"id to INTENTIONALLY_SHARED_IDS in script/merge_component_configs.py if "
"it is a deliberately shared singleton."
)
if result.parse_errors:
# A fixture we could not parse was never scanned, so the run is not a
# clean pass even if no conflicts were found among the rest.
print(
f"\n{len(result.parse_errors)} test fixture(s) could not be parsed and "
"were not checked:"
)
for path in result.parse_errors:
print(f" - {path}")
if result.components_scanned == 0:
# A scan that covered nothing is a false green -- the whole point of the
# guard is defeated. Fail loudly (wrong working directory or layout change).
print(
f"\nERROR: scanned 0 component test fixtures under {TESTS_DIR}; "
"the guard covered nothing.",
file=sys.stderr,
)
if result.conflicts or result.parse_errors or result.components_scanned == 0:
return 1
print(
f"No conflicting test component ids found "
f"({result.components_scanned} fixtures scanned)."
)
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -161,18 +161,46 @@ def prefix_substitutions_in_dict(
return data
# (section, id) pairs that several components intentionally share. ESPHome
# treats these as a single instance when merged, so duplicates with differing
# content are expected and must not be flagged as accidental collisions. Keyed on
# the section as well as the id so a generic name (e.g. `ldo_id`) is only exempt
# in its intended section -- an accidental collision on the same name elsewhere
# is still caught.
INTENTIONALLY_SHARED_IDS = frozenset(
{
# Several components each declare an `sntp_time` clock; ESPHome merges
# them into one time source.
("time", "sntp_time"),
# esp_ldo and mipi_dsi both configure the channel-3 internal LDO on the
# ESP32-P4; only one LDO per channel may exist, so the shared id lets the
# merge collapse them into a single LDO.
("esp_ldo", "ldo_id"),
}
)
def deduplicate_by_id(data: dict) -> dict:
"""Deduplicate list items with the same ID.
Keeps only the first occurrence of each ID. If items with the same ID
are identical, this silently deduplicates. If they differ, the first
one is kept (ESPHome's validation will catch if this causes issues).
Identical items sharing an ID (e.g. a shared bus from a common package pulled
in by several components) are collapsed to the first occurrence. Two items
that share an ID but differ in content are a real conflict: when merged, the
first silently wins and the others are dropped, which can make a
cross-reference resolve to an incompatible entity. Rather than defer that to
downstream validation (where it surfaces as a confusing, order-dependent
failure in an unrelated build), raise immediately so the offending ID is
named. Ids in ``INTENTIONALLY_SHARED_IDS`` are deliberately shared singletons
and keep their collapse behaviour.
Args:
data: Parsed config dictionary
Returns:
Config with deduplicated lists
Raises:
ValueError: If two items share an ID but have different content.
"""
if not isinstance(data, dict):
return data
@@ -181,16 +209,25 @@ def deduplicate_by_id(data: dict) -> dict:
for key, value in data.items():
if isinstance(value, list):
# Check for items with 'id' field
seen_ids = set()
seen_items: dict[str, Any] = {}
deduped_list = []
for item in value:
if isinstance(item, dict) and "id" in item:
item_id = item["id"]
if item_id not in seen_ids:
seen_ids.add(item_id)
if item_id not in seen_items:
seen_items[item_id] = item
deduped_list.append(item)
# else: skip duplicate ID (keep first occurrence)
elif (key, item_id) in INTENTIONALLY_SHARED_IDS:
# Deliberately shared singleton -> keep first occurrence.
pass
elif item != seen_items[item_id]:
raise ValueError(
f"Conflicting definitions for id '{item_id}' under "
f"'{key}' when merging test configs; give each "
f"component a unique id"
)
# else: identical duplicate (e.g. shared bus package) -> skip
else:
# No ID, just add it
deduped_list.append(item)
@@ -205,6 +242,55 @@ def deduplicate_by_id(data: dict) -> dict:
return result
def prepare_component_body(comp_data: dict, comp_name: str, comp_dir: Path) -> dict:
"""Return a component's test body as it enters the merge.
Expands component-specific package includes inline (common bus packages are
left for the merge to re-add once), applies ESPHome's top-level-substitutions
-override-package-substitutions rule, then prefixes every substitution
reference with the component name. Shared by ``merge_component_configs`` and
the duplicate-id guard (``script/ci_check_duplicate_test_ids.py``) so the
guard compares exactly what the build merges.
"""
# $component_dir resolves to the component's absolute path.
comp_abs_dir = str(comp_dir.absolute())
# Top-level substitutions override package substitutions, so capture them
# before expanding packages can introduce their own.
top_level_subs = (
comp_data["substitutions"].copy()
if isinstance(comp_data.get("substitutions"), dict)
else {}
)
packages_value = comp_data.get("packages")
if isinstance(packages_value, dict):
common_bus_packages = get_common_bus_packages()
for pkg_name, pkg_value in list(packages_value.items()):
if pkg_name in common_bus_packages:
continue
if isinstance(pkg_value, yaml_util.IncludeFile):
pkg_value = pkg_value.load()
if isinstance(pkg_value, dict):
comp_data = merge_config(comp_data, pkg_value)
elif isinstance(packages_value, list):
for pkg_value in packages_value:
if isinstance(pkg_value, yaml_util.IncludeFile):
pkg_value = pkg_value.load()
if isinstance(pkg_value, dict):
comp_data = merge_config(comp_data, pkg_value)
# Common bus packages are re-added once by the caller; drop them here.
comp_data.pop("packages", None)
subs = comp_data.get("substitutions") or {}
subs.update(top_level_subs)
prefixed_subs = {f"{comp_name}_{name}": value for name, value in subs.items()}
prefixed_subs[f"{comp_name}_component_dir"] = comp_abs_dir
comp_data["substitutions"] = prefixed_subs
return prefix_substitutions_in_dict(comp_data, comp_name)
def merge_component_configs(
component_names: list[str],
platform: str,
@@ -266,67 +352,9 @@ def merge_component_configs(
# New package type - add it
all_packages[pkg_name] = pkg_config
# Handle $component_dir by replacing with absolute path
# This allows components that use local file references to be grouped
comp_abs_dir = str(comp_dir.absolute())
# Save top-level substitutions BEFORE expanding packages
# In ESPHome, top-level substitutions override package substitutions
top_level_subs = (
comp_data["substitutions"].copy()
if "substitutions" in comp_data and comp_data["substitutions"] is not None
else {}
)
# Expand packages - but we'll restore substitution priority after
if "packages" in comp_data:
packages_value = comp_data["packages"]
if isinstance(packages_value, dict):
# Dict format - check each package
common_bus_packages = get_common_bus_packages()
for pkg_name, pkg_value in list(packages_value.items()):
if pkg_name in common_bus_packages:
continue
# Resolve deferred !include files before checking type
if isinstance(pkg_value, yaml_util.IncludeFile):
pkg_value = pkg_value.load()
if not isinstance(pkg_value, dict):
continue
# Component-specific package - expand its content into top level
comp_data = merge_config(comp_data, pkg_value)
elif isinstance(packages_value, list):
# List format - expand all package includes
for pkg_value in packages_value:
# Resolve deferred !include files before checking type
if isinstance(pkg_value, yaml_util.IncludeFile):
pkg_value = pkg_value.load()
if not isinstance(pkg_value, dict):
continue
comp_data = merge_config(comp_data, pkg_value)
# Remove all packages (common will be re-added at the end)
del comp_data["packages"]
# Restore top-level substitution priority
# Top-level substitutions override any from packages
if "substitutions" not in comp_data or comp_data["substitutions"] is None:
comp_data["substitutions"] = {}
# Merge: package subs as base, top-level subs override
comp_data["substitutions"].update(top_level_subs)
# Now prefix the final merged substitutions
comp_data["substitutions"] = {
f"{comp_name}_{sub_name}": sub_value
for sub_name, sub_value in comp_data["substitutions"].items()
}
# Add component_dir substitution with absolute path for this component
comp_data["substitutions"][f"{comp_name}_component_dir"] = comp_abs_dir
# Prefix substitution references throughout the config
comp_data = prefix_substitutions_in_dict(comp_data, comp_name)
# Expand component-specific packages and prefix substitutions, exactly as
# the duplicate-id guard does, so both see the same body.
comp_data = prepare_component_body(comp_data, comp_name, comp_dir)
# Use ESPHome's merge_config to merge this component into the result
# merge_config handles list merging with ID-based deduplication automatically