[ci] Replace clang-tidy hash with direct config-file diff check (#17019)

This commit is contained in:
Jonathan Swoboda
2026-06-17 21:12:39 -04:00
committed by GitHub
parent 34844da668
commit 7cb6cf2f2a
9 changed files with 124 additions and 674 deletions

View File

@@ -276,7 +276,7 @@ def lint_newline(fname, line, col, content):
return "File contains Windows newline. Please set your editor to Unix newline mode."
@lint_content_check(exclude=["*.svg", ".clang-tidy.hash"])
@lint_content_check(exclude=["*.svg"])
def lint_end_newline(fname, content):
if content and not content.endswith("\n"):
return "File does not end with a newline, please add an empty line at the end of the file."

208
script/clang_tidy_hash.py Executable file → Normal file
View File

@@ -1,66 +1,32 @@
#!/usr/bin/env python3
"""Calculate and manage hash for clang-tidy configuration."""
"""Files that affect clang-tidy results, and a content hash over them.
``CLANG_TIDY_GLOBAL_FILES`` (plus ``SDKCONFIG_DEFAULTS_PREFIX``) is the single
source of truth for which files influence clang-tidy output. A change to any of
them can surface warnings in source files a PR didn't touch, so:
* ``script/determine-jobs.py`` runs a full clang-tidy scan when one changes, and
* ``calculate_clang_tidy_hash()`` folds them into the idedata cache key used by
``script/helpers.py`` (a content hash, unlike an mtime check, stays correct
across git checkouts).
"""
from __future__ import annotations
import argparse
import hashlib
from pathlib import Path
import re
import sys
# Add the script directory to path to import helpers
script_dir = Path(__file__).parent
sys.path.insert(0, str(script_dir))
# Root-relative paths whose contents affect clang-tidy results.
CLANG_TIDY_GLOBAL_FILES = (
".clang-tidy",
"platformio.ini",
"requirements_dev.txt",
"esphome/idf_component.yml",
)
def read_file_lines(path: Path) -> list[str]:
"""Read lines from a file."""
with path.open() as f:
return f.readlines()
def parse_requirement_line(line: str) -> tuple[str, str] | None:
"""Parse a requirement line and return (package, original_line) or None.
Handles formats like:
- package==1.2.3
- package==1.2.3 # comment
- package>=1.2.3,<2.0.0
"""
original_line = line.strip()
# Extract the part before any comment for parsing
parse_line = line
if "#" in parse_line:
parse_line = parse_line[: parse_line.index("#")]
parse_line = parse_line.strip()
if not parse_line:
return None
# Use regex to extract package name
# This matches package names followed by version operators
match = re.match(r"^([a-zA-Z0-9_-]+)(==|>=|<=|>|<|!=|~=)(.+)$", parse_line)
if match:
return (match.group(1), original_line) # Return package name and original line
return None
def get_clang_tidy_version_from_requirements(repo_root: Path | None = None) -> str:
"""Get clang-tidy version from requirements_dev.txt"""
repo_root = _ensure_repo_root(repo_root)
requirements_path = repo_root / "requirements_dev.txt"
lines = read_file_lines(requirements_path)
for line in lines:
parsed = parse_requirement_line(line)
if parsed and parsed[0] == "clang-tidy":
# Return the original line (preserves comments)
return parsed[1]
return "clang-tidy version not found"
# sdkconfig.defaults and per-target sdkconfig.defaults.<target> files flip the
# CONFIG flags that decide which variant code paths clang-tidy sees. Matched by
# this prefix at the repo root.
SDKCONFIG_DEFAULTS_PREFIX = "sdkconfig.defaults"
def read_file_bytes(path: Path) -> bytes:
@@ -80,130 +46,20 @@ def _ensure_repo_root(repo_root: Path | None) -> Path:
def calculate_clang_tidy_hash(repo_root: Path | None = None) -> str:
"""Calculate hash of clang-tidy configuration and version"""
"""Calculate a hash of the files that affect clang-tidy results."""
repo_root = _ensure_repo_root(repo_root)
hasher = hashlib.sha256()
# Hash .clang-tidy file
clang_tidy_path = repo_root / ".clang-tidy"
content = read_file_bytes(clang_tidy_path)
hasher.update(content)
for name in CLANG_TIDY_GLOBAL_FILES:
path = repo_root / name
if path.exists():
hasher.update(read_file_bytes(path))
# Hash clang-tidy version from requirements_dev.txt
version = get_clang_tidy_version_from_requirements(repo_root)
hasher.update(version.encode())
# Hash the entire platformio.ini file
platformio_path = repo_root / "platformio.ini"
platformio_content = read_file_bytes(platformio_path)
hasher.update(platformio_content)
# Hash sdkconfig.defaults and any per-target sdkconfig.defaults.<target>:
# the per-target files flip CONFIG flags that change which variant code
# paths clang-tidy sees. Include the filename so a rename is detected.
for sdkconfig_path in sorted(repo_root.glob("sdkconfig.defaults*")):
hasher.update(sdkconfig_path.name.encode())
hasher.update(read_file_bytes(sdkconfig_path))
# Hash esphome/idf_component.yml: its managed deps drive the ESP-IDF
# build's include set, which clang-tidy analyzes.
idf_component_path = repo_root / "esphome" / "idf_component.yml"
if idf_component_path.exists():
hasher.update(read_file_bytes(idf_component_path))
# Hash each sdkconfig.defaults* file. Include the filename so adding or
# renaming a per-target variant is detected, not just content edits.
for path in sorted(repo_root.glob(f"{SDKCONFIG_DEFAULTS_PREFIX}*")):
hasher.update(path.name.encode())
hasher.update(read_file_bytes(path))
return hasher.hexdigest()
def read_stored_hash(repo_root: Path | None = None) -> str | None:
"""Read the stored hash from file"""
repo_root = _ensure_repo_root(repo_root)
hash_file = repo_root / ".clang-tidy.hash"
if hash_file.exists():
lines = read_file_lines(hash_file)
return lines[0].strip() if lines else None
return None
def write_file_content(path: Path, content: str) -> None:
"""Write content to a file."""
with path.open("w") as f:
f.write(content)
def write_hash(hash_value: str, repo_root: Path | None = None) -> None:
"""Write hash to file"""
repo_root = _ensure_repo_root(repo_root)
hash_file = repo_root / ".clang-tidy.hash"
# Strip any trailing newlines to ensure consistent formatting
write_file_content(hash_file, hash_value.strip() + "\n")
def main() -> None:
parser = argparse.ArgumentParser(description="Manage clang-tidy configuration hash")
parser.add_argument(
"--check",
action="store_true",
help="Check if full scan needed (exit 0 if needed)",
)
parser.add_argument("--update", action="store_true", help="Update the hash file")
parser.add_argument(
"--update-if-changed",
action="store_true",
help="Update hash only if configuration changed (for pre-commit)",
)
parser.add_argument(
"--verify", action="store_true", help="Verify hash matches (for CI)"
)
args = parser.parse_args()
current_hash = calculate_clang_tidy_hash()
stored_hash = read_stored_hash()
if args.check:
# Check if hash changed OR if .clang-tidy.hash was updated in this PR
# This is used in CI to determine if a full clang-tidy scan is needed
hash_changed = current_hash != stored_hash
# Lazy import to avoid requiring dependencies that aren't needed for other modes
from helpers import changed_files # noqa: E402
hash_file_updated = ".clang-tidy.hash" in changed_files()
# Exit 0 if full scan needed
sys.exit(0 if (hash_changed or hash_file_updated) else 1)
elif args.verify:
# Verify that hash file is up to date with current configuration
# This is used in pre-commit and CI checks to ensure hash was updated
if current_hash != stored_hash:
print("ERROR: Clang-tidy configuration has changed but hash not updated!")
print(f"Expected: {current_hash}")
print(f"Found: {stored_hash}")
print("\nPlease run: script/clang_tidy_hash.py --update")
sys.exit(1)
print("Hash verification passed")
elif args.update:
write_hash(current_hash)
print(f"Hash updated: {current_hash}")
elif args.update_if_changed:
if current_hash != stored_hash:
write_hash(current_hash)
print(f"Clang-tidy hash updated: {current_hash}")
# Exit 0 so pre-commit can stage the file
sys.exit(0)
else:
print("Clang-tidy hash unchanged")
sys.exit(0)
else:
print(f"Current hash: {current_hash}")
print(f"Stored hash: {stored_hash}")
print(f"Match: {current_hash == stored_hash}")
if __name__ == "__main__":
main()

View File

@@ -55,10 +55,10 @@ from functools import cache
import json
import os
from pathlib import Path
import subprocess
import sys
from typing import Any
from clang_tidy_hash import CLANG_TIDY_GLOBAL_FILES, SDKCONFIG_DEFAULTS_PREFIX
from helpers import (
CPP_FILE_EXTENSIONS,
ESPHOME_TESTS_COMPONENTS_PATH,
@@ -280,23 +280,22 @@ def determine_integration_tests(branch: str | None = None) -> tuple[bool, list[s
@cache
def _is_clang_tidy_full_scan() -> bool:
"""Check if clang-tidy configuration changed (requires full scan).
def _is_clang_tidy_full_scan(branch: str | None = None) -> bool:
"""Check if a clang-tidy-relevant config file changed (requires full scan).
A change to a file that affects clang-tidy globally can surface warnings in
source files the PR didn't touch, so the entire codebase must be re-scanned.
Returns:
True if full scan is needed (hash changed), False otherwise.
True if full scan is needed, False otherwise.
"""
try:
result = subprocess.run(
[str(Path(root_path) / "script" / "clang_tidy_hash.py"), "--check"],
capture_output=True,
check=False,
)
# Exit 0 means hash changed (full scan needed)
return result.returncode == 0
except Exception: # noqa: BLE001
# If hash check fails, run full scan to be safe
return True
for file in changed_files(branch):
if file in CLANG_TIDY_GLOBAL_FILES:
return True
# Root-level sdkconfig.defaults and per-target sdkconfig.defaults.<target>
if "/" not in file and file.startswith(SDKCONFIG_DEFAULTS_PREFIX):
return True
return False
def should_run_clang_tidy(branch: str | None = None) -> bool:
@@ -307,13 +306,12 @@ def should_run_clang_tidy(branch: str | None = None) -> bool:
Clang-tidy will run when ANY of the following conditions are met:
1. Clang-tidy configuration changed
- The hash of .clang-tidy configuration file has changed
- The hash includes the .clang-tidy file, clang-tidy version from requirements_dev.txt,
and relevant platformio.ini sections
- When configuration changes, a full scan is needed to ensure all code complies
with the new rules
- Detected by script/clang_tidy_hash.py --check returning exit code 0
1. A clang-tidy-relevant config file changed (full scan needed)
- Any file in CLANG_TIDY_GLOBAL_FILES (.clang-tidy, platformio.ini,
requirements_dev.txt, esphome/idf_component.yml) or a root-level
sdkconfig.defaults* file
- These affect clang-tidy results globally, so all code must be re-checked
to ensure it still complies
2. Any C++ source files changed
- Any file with C++ extensions: .cpp, .h, .hpp, .cc, .cxx, .c, .tcc
@@ -321,27 +319,14 @@ def should_run_clang_tidy(branch: str | None = None) -> bool:
- This ensures all C++ code is checked, including tests, examples, etc.
- Examples: esphome/core/component.cpp, tests/custom/my_component.h
3. The .clang-tidy.hash file itself changed
- This indicates the configuration has been updated and clang-tidy should run
- Ensures that PRs updating the clang-tidy configuration are properly validated
If the hash check fails for any reason, clang-tidy runs as a safety measure to ensure
code quality is maintained.
Args:
branch: Branch to compare against. If None, uses default.
Returns:
True if clang-tidy should run, False otherwise.
"""
# First check if clang-tidy configuration changed (full scan needed)
if _is_clang_tidy_full_scan():
return True
# Check if .clang-tidy.hash file itself was changed
# This handles the case where the hash was properly updated in the PR
files = changed_files(branch)
if ".clang-tidy.hash" in files:
# First check if a clang-tidy-relevant config file changed (full scan needed)
if _is_clang_tidy_full_scan(branch):
return True
return _any_changed_file_endswith(branch, CPP_FILE_EXTENSIONS)
@@ -1276,9 +1261,9 @@ def main() -> None:
# Determine clang-tidy mode based on actual files that will be checked
is_full_scan = False
if run_clang_tidy:
# Full scan needed if: hash changed OR core files changed
# (is_core_change is forced True under --force-all)
is_full_scan = _is_clang_tidy_full_scan() or is_core_change
# Full scan needed if: a clang-tidy-relevant config file changed OR
# core files changed (is_core_change is forced True under --force-all)
is_full_scan = _is_clang_tidy_full_scan(args.branch) or is_core_change
if is_full_scan:
# Full scan checks all files - always use split mode for efficiency