[espidf] Fix idedata generation on Windows (#16894)

This commit is contained in:
Jonathan Swoboda
2026-06-09 21:09:50 -04:00
committed by GitHub
parent 4f62bb7171
commit 4963ddcb95
2 changed files with 127 additions and 9 deletions

View File

@@ -29,6 +29,54 @@ _INPUT_FILE_SUFFIXES = (*_CXX_SUFFIXES, ".c", ".o", ".S", ".s")
_ESPHOME_SRC_MARKER = "/src/esphome/"
def _is_esphome_src(file: str) -> bool:
"""Whether ``file`` is an ESPHome C++ translation unit.
``compile_commands.json`` ``file`` paths use the OS-native separator, so on
Windows they contain backslashes; normalize to ``/`` before testing the
marker, otherwise no source matches and the build-include union is empty.
"""
return _ESPHOME_SRC_MARKER in file.replace("\\", "/") and file.endswith(
_CXX_SUFFIXES
)
def _split_command(command: str) -> list[str]:
r"""Tokenize a compile_commands.json / response-file command string.
On Windows, tokenize per Windows ``argv`` rules via ``CommandLineToArgvW``.
ESP-IDF's compile_commands.json there mixes two backslash conventions in one
string: literal path separators in the compiler path (``C:\Users\...g++.exe``,
no quote follows) and shell quote-escaping in -D defines (``-DVER=\"1.2.3\"``).
Only the real Windows parser — where a backslash escapes solely a following
quote — handles both, and it is the exact tokenizer the compiler is launched
with. ``shlex`` cannot: POSIX mode eats the path separators, and disabling
its escape mangles the defines.
"""
if os.name != "nt":
return shlex.split(command)
import ctypes
from ctypes import wintypes
# CommandLineToArgvW("") returns the current process name, not []; guard it
# so an empty response file tokenizes the same as it would via shlex.
if not command.strip():
return []
CommandLineToArgvW = ctypes.windll.shell32.CommandLineToArgvW
CommandLineToArgvW.argtypes = [wintypes.LPCWSTR, ctypes.POINTER(ctypes.c_int)]
CommandLineToArgvW.restype = ctypes.POINTER(wintypes.LPWSTR)
argc = ctypes.c_int()
argv = CommandLineToArgvW(command, ctypes.byref(argc))
if not argv: # pragma: no cover
raise ctypes.WinError()
try:
return [argv[i] for i in range(argc.value)]
finally:
ctypes.windll.kernel32.LocalFree(argv)
def _expand_response_files(tokens: list[str], directory: Path) -> list[str]:
"""Inline any ``@response-file`` arguments (paths relative to ``directory``).
@@ -45,7 +93,7 @@ def _expand_response_files(tokens: list[str], directory: Path) -> list[str]:
try:
out.extend(
_expand_response_files(
shlex.split(rf.read_text(encoding="utf-8")), directory
_split_command(rf.read_text(encoding="utf-8")), directory
)
)
continue
@@ -64,8 +112,7 @@ def _pick_entry(entries: list[dict]) -> dict:
them yields the cxx_path / cxx_flags / defines we need.
"""
for entry in entries:
f = entry["file"]
if _ESPHOME_SRC_MARKER in f and f.endswith(_CXX_SUFFIXES):
if _is_esphome_src(entry["file"]):
return entry
for entry in entries:
if entry["file"].endswith(_CXX_SUFFIXES):
@@ -76,18 +123,22 @@ def _pick_entry(entries: list[dict]) -> dict:
def _parse_entry(entry: dict) -> tuple[str, list[str], list[str], list[str]]:
"""Parse one compile_commands entry -> (cxx_path, defines, includes, cxx_flags)."""
directory = Path(entry["directory"])
tokens = _expand_response_files(shlex.split(entry["command"]), directory)
tokens = _expand_response_files(_split_command(entry["command"]), directory)
def _include(raw: str) -> str:
# Include paths in compile_commands are interpreted relative to the
# entry's ``directory`` (e.g. build-local ``-Iconfig``); resolve them
# so the cached idedata is usable regardless of the consumer's cwd.
# Emit forward slashes (``normpath`` yields ``\`` on Windows) so the
# paths match the absolute, already-forward-slash entries in the JSON.
raw = raw.strip()
if raw and not Path(raw).is_absolute():
raw = os.path.normpath(directory / raw)
return raw
return raw.replace("\\", "/")
cxx_path = tokens[0]
# token0 is the compiler path; the rest of the command already uses forward
# slashes on Windows, so normalize it too for a consistent idedata file.
cxx_path = tokens[0].replace("\\", "/")
defines: list[str] = []
includes: list[str] = []
cxx_flags: list[str] = []
@@ -161,8 +212,7 @@ def idedata_from_build(compile_commands: Path) -> dict:
build_includes: dict[str, None] = {}
for entry in entries:
f = entry["file"]
if _ESPHOME_SRC_MARKER not in f or not f.endswith(_CXX_SUFFIXES):
if not _is_esphome_src(entry["file"]):
continue
for inc in _parse_entry(entry)[2]:
build_includes.setdefault(inc, None)

View File

@@ -72,7 +72,9 @@ def test_parse_entry_resolves_relative_includes() -> None:
_, _, includes, _ = idedata._parse_entry(entry)
def resolved(rel: str) -> str:
return os.path.normpath(Path(directory) / rel)
# _parse_entry emits forward slashes for consistency (normpath would
# yield backslashes on Windows).
return os.path.normpath(Path(directory) / rel).replace("\\", "/")
assert resolved("config") in includes
assert resolved("../shared") in includes # ../ normalized away
@@ -124,6 +126,29 @@ def test_pick_entry_prefers_esphome_tu() -> None:
assert idedata._pick_entry(entries)["file"].endswith("app.cpp")
def test_pick_entry_falls_back_to_any_cxx_tu() -> None:
"""With no ``/src/esphome/`` TU present, the first C++ entry is the fallback."""
entries = [
_entry("/b", "/b/managed_components/foo/foo.c", "gcc -c foo.c"),
_entry("/b", "/b/components/x/x.cpp", "g++ -c x.cpp"),
]
assert idedata._pick_entry(entries)["file"].endswith("x.cpp")
def test_is_esphome_src_handles_backslash_paths() -> None:
r"""The src marker must match Windows ``\src\esphome\`` paths too.
compile_commands ``file`` entries use the OS-native separator; if the
marker only matched forward slashes no source would match on Windows and
the build-include union would be silently empty.
"""
assert idedata._is_esphome_src(r"C:\b\src\esphome\core\app.cpp")
assert idedata._is_esphome_src("/b/src/esphome/core/app.cpp")
# non-esphome and non-C++ still rejected regardless of separator
assert not idedata._is_esphome_src(r"C:\b\managed_components\x\x.cpp")
assert not idedata._is_esphome_src(r"C:\b\src\esphome\core\app.h")
def test_idedata_from_build(tmp_path: Path) -> None:
"""Full transform: representative entry + include union + toolchain dirs."""
compile_commands = tmp_path / "compile_commands.json"
@@ -194,3 +219,46 @@ def test_get_toolchain_includes_raises_when_no_dirs_found() -> None:
pytest.raises(RuntimeError, match="builtin include dirs"),
):
idedata._get_toolchain_includes("/some/compiler")
# ESP-IDF's compile_commands.json on Windows mixes literal backslash path
# separators in the compiler path with shell ``\"`` quote-escaping in defines,
# which only the real Windows argv parser handles. These exercise that path.
@pytest.mark.skipif(os.name != "nt", reason="Windows argv tokenization")
def test_split_command_preserves_paths_and_unescapes_quotes() -> None:
r"""Backslash paths survive while ``\"`` define-quoting is unescaped."""
command = r"C:\esp\bin\riscv32-esp-elf-g++.exe -DVER=\"1.2.3\" -IC:/inc/a -c x.cpp"
tokens = idedata._split_command(command)
assert tokens[0] == r"C:\esp\bin\riscv32-esp-elf-g++.exe"
assert '-DVER="1.2.3"' in tokens
assert "-IC:/inc/a" in tokens
@pytest.mark.skipif(os.name != "nt", reason="Windows argv tokenization")
def test_split_command_empty_returns_empty() -> None:
"""An empty or blank command tokenizes to ``[]`` (e.g. an empty response file).
Guards against ``CommandLineToArgvW("")`` returning the current process name
instead of an empty list.
"""
assert idedata._split_command("") == []
assert idedata._split_command(" ") == []
@pytest.mark.skipif(os.name != "nt", reason="Windows argv tokenization")
def test_parse_entry_normalizes_windows_cxx_path() -> None:
"""A backslash compiler path is emitted forward-slashed; define unescaped."""
entry = _entry(
r"C:\b",
r"C:\b\src\esphome\x.cpp",
r"C:\esp\bin\g++.exe -DVER=\"1.2.3\" -IC:/inc/a -c x.cpp",
)
cxx_path, defines, includes, _ = idedata._parse_entry(entry)
assert cxx_path == "C:/esp/bin/g++.exe"
assert "\\" not in cxx_path
assert 'VER="1.2.3"' in defines
assert "C:/inc/a" in includes