Compare commits

...

5 Commits

Author SHA1 Message Date
J. Nick Koston
02f9a78a86 [bk72xx] Address Copilot review: fix UB in scan_backtrace + snprintf truncation
- scan_backtrace: do bounds math in uintptr_t space and only cast to
  pointer at dereference. Comparing pointers from unrelated objects
  (the SP value vs the linker RAM-end symbol) is technically UB in C++
  even though it works on flat-address embedded targets.
- crash_handler_log: clamp the snprintf return value before using it
  as an offset. snprintf returns < 0 on error or >= size on truncation,
  and feeding that straight back into pointer arithmetic is UB.
2026-04-30 13:15:02 -05:00
J. Nick Koston
b0c618ed27 [bk72xx] Emit USE_BK72XX_CRASH_HANDLER via cg.add_define
The previous gate lived only in core/defines.h, which is loaded for
static analysis / IDE support but not for actual builds. As a result
the wrap symbols (__wrap_bk_trap_udef/pabt/dabt) were preprocessed out
of crash_handler.cpp at compile time, leaving the linker without a
target for the SDK's wrapped trap calls. Set the define from
libretiny/__init__.py (where the platform-specific codegen lives —
bk72xx/__init__.py is auto-generated and not the right home).
2026-04-30 13:03:25 -05:00
J. Nick Koston
1e3c104cbc [bk72xx] Read RAM/flash bounds from linker symbols, not constants
Addresses review feedback ("unsafe assumptions about memory layout",
"its a heuristic"). The patch_bk72xx_noinit.py extra_script now also
emits PROVIDE(_esphome_{ram,flash}_{start,end}) tied directly to the
linker's MEMORY definition. crash_handler.cpp consumes those symbols
instead of hardcoding bounds, so it tracks the actual variant + board
layout (BK7231N=192KB vs others=256KB, board-specific BKOFFSET_APP /
BKRBL_SIZE_APP) without any chip-aware constants of its own.

Mirrors the linker-symbol pattern already used in
components/esp8266/crash_handler.cpp for the IROM bounds.
2026-04-30 12:57:18 -05:00
J. Nick Koston
5bd5cf1dab [bk72xx] Use USE_LIBRETINY_VARIANT_BK7231N for variant-specific RAM size
Mirrors the split in LibreTiny's lt_mem.c: BK7231N has 192KB RAM, every
other BK72XX variant has 256KB. The previous hardcoded 0x00440000 was
correct for everything except BK7231N, where the stack-scan upper bound
sat 64KB past the chip's physical RAM end.
2026-04-30 12:55:38 -05:00
J. Nick Koston
8fc2e1e542 [bk72xx] Add crash-on-previous-boot detection
Brings BK72XX to parity with esp32/esp8266/rp2040: register snapshot +
stack-scanned backtrace are captured during the SDK's exception traps
(undefined instruction / prefetch abort / data abort), persisted across
the watchdog reset in a .noinit RAM region, then logged on the next
boot via the logger and API log subscription paths.

The capture path uses linker --wrap on bk_trap_udef/pabt/dabt — the
trap symbols are defined in LibreTiny's intc.c fixup but called from
the closed-source Beken SDK across translation units, so --wrap takes
effect cleanly. The original SDK behavior (UART register dump +
bk_cpu_shutdown → watchdog reset) is preserved by tail-calling the
real handlers after capture.

A small extra_script (patch_bk72xx_noinit.py) injects a .noinit
section between .bss and _empty_ram in the generated linker script
so the crash record is not zeroed by the C runtime startup and the
heap shrinks to fit rather than overlapping it.
2026-04-30 12:50:16 -05:00
8 changed files with 392 additions and 5 deletions

View File

@@ -23,6 +23,9 @@
#ifdef USE_ESP8266_CRASH_HANDLER
#include "esphome/components/esp8266/crash_handler.h"
#endif
#ifdef USE_BK72XX_CRASH_HANDLER
#include "esphome/components/bk72xx/crash_handler.h"
#endif
#include "esphome/core/entity_base.h"
#include "esphome/core/string_ref.h"
@@ -283,6 +286,9 @@ class APIConnection final : public APIServerConnectionBase {
#endif
#ifdef USE_ESP8266_CRASH_HANDLER
esp8266::crash_handler_log();
#endif
#ifdef USE_BK72XX_CRASH_HANDLER
bk72xx::crash_handler_log();
#endif
}
#ifdef USE_API_HOMEASSISTANT_SERVICES

View File

@@ -0,0 +1,235 @@
#ifdef USE_BK72XX
#include "esphome/core/defines.h"
#ifdef USE_BK72XX_CRASH_HANDLER
#include "crash_handler.h"
#include "esphome/core/log.h"
#include <cinttypes>
#include <cstdint>
#include <cstring>
// ARM968E-S register snapshot passed to the SDK trap handlers (matches the
// layout used by bk_show_register() in LibreTiny's intc.c fixup).
extern "C" struct arm_registers {
uint32_t r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10;
uint32_t fp;
uint32_t ip;
uint32_t sp;
uint32_t lr;
uint32_t pc;
uint32_t spsr;
uint32_t cpsr;
};
static constexpr uint32_t CRASH_MAGIC = 0xDEADBEEF;
static constexpr uint32_t CRASH_DATA_VERSION = 1;
static constexpr size_t MAX_BACKTRACE = 16;
// BK72XX exception types (matches the order in which we wrap the SDK traps).
enum CrashException : uint8_t {
CRASH_EXC_NONE = 0,
CRASH_EXC_UNDEF = 1, // Undefined instruction
CRASH_EXC_PABT = 2, // Prefetch abort
CRASH_EXC_DABT = 3, // Data abort
};
// Persistent crash record. Lives in .noinit so it survives the SDK's watchdog
// reset that follows bk_cpu_shutdown(). Validated by magic + version on the
// next boot.
struct CrashData {
uint32_t magic;
uint32_t version;
uint32_t pc;
uint32_t lr;
uint32_t sp;
uint32_t cpsr;
uint8_t exception;
uint8_t backtrace_count;
uint16_t reserved;
uint32_t backtrace[MAX_BACKTRACE];
};
// Placed in .noinit so it survives the watchdog reset that follows
// bk_cpu_shutdown(). The libretiny linker fragment (patch_bk72xx_noinit.py)
// inserts a .noinit section between .bss and _empty_ram so this region is
// not zeroed by the C runtime startup.
static CrashData s_raw_crash_data __attribute__((section(".noinit"), used));
// Whether crash data was found and validated this boot. Lives in .bss
// (zero-initialized at startup) — set by crash_handler_read_and_clear().
static bool s_crash_data_valid = false; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
// RAM and flash bounds come from linker symbols injected by
// libretiny/patch_bk72xx_noinit.py.script (PROVIDE assignments tied to the
// linker's own MEMORY definition). This avoids hardcoding chip-variant
// (BK7231N has 192KB RAM, others 256KB) or board (BKOFFSET_APP /
// BKRBL_SIZE_APP) layout — the linker is the source of truth.
extern "C" {
// NOLINTBEGIN(bugprone-reserved-identifier,readability-identifier-naming,readability-redundant-declaration)
extern char _esphome_ram_start[];
extern char _esphome_ram_end[];
extern char _esphome_flash_start[];
extern char _esphome_flash_end[];
// NOLINTEND(bugprone-reserved-identifier,readability-identifier-naming,readability-redundant-declaration)
}
static inline bool is_code_addr(uint32_t addr) {
// ARM968 instructions are 4-byte aligned; reject obviously bogus values.
if ((addr & 0x3) != 0)
return false;
return addr >= reinterpret_cast<uintptr_t>(_esphome_flash_start) &&
addr < reinterpret_cast<uintptr_t>(_esphome_flash_end);
}
static inline bool is_valid_stack_ptr(uint32_t sp) {
return (sp & 0x3) == 0 && sp >= reinterpret_cast<uintptr_t>(_esphome_ram_start) &&
sp < reinterpret_cast<uintptr_t>(_esphome_ram_end);
}
// Walk the stack starting at `sp` and capture up to `max` code-looking
// 32-bit values into `out`. Skips entries equal to the fault PC so it isn't
// reported twice. Returns the number of entries captured.
//
// Bounds math is done in uintptr_t space; comparing pointers from unrelated
// objects (sp vs the linker-symbol RAM-end) is technically UB in C++ even
// though it works on flat-address embedded targets.
static uint8_t scan_backtrace(uint32_t sp, uint32_t pc, uint32_t *out, uint8_t max) {
if (!is_valid_stack_ptr(sp))
return 0;
// Limit the scan to 256 words (1KB) — covers typical nested call frames
// without dredging up too many stale stack values.
static constexpr uint32_t SCAN_WORDS = 256;
uintptr_t scan_addr = sp;
uintptr_t end_addr = reinterpret_cast<uintptr_t>(_esphome_ram_end);
uintptr_t limit_addr = scan_addr + SCAN_WORDS * sizeof(uint32_t);
if (limit_addr > end_addr)
limit_addr = end_addr;
uint8_t count = 0;
for (; scan_addr < limit_addr && count < max; scan_addr += sizeof(uint32_t)) {
uint32_t val = *reinterpret_cast<const uint32_t *>(scan_addr);
if (is_code_addr(val) && val != pc) {
out[count++] = val;
}
}
return count;
}
namespace esphome::bk72xx {
static const char *const TAG = "bk72xx.crash";
void crash_handler_read_and_clear() {
if (s_raw_crash_data.magic == CRASH_MAGIC && s_raw_crash_data.version == CRASH_DATA_VERSION) {
s_crash_data_valid = true;
if (s_raw_crash_data.backtrace_count > MAX_BACKTRACE)
s_raw_crash_data.backtrace_count = MAX_BACKTRACE;
if (s_raw_crash_data.exception > CRASH_EXC_DABT)
s_raw_crash_data.exception = CRASH_EXC_NONE;
}
// Clear magic so the next boot doesn't replay this crash. s_crash_data_valid
// remains true so additional API clients connecting in this boot session
// still see the log via crash_handler_log().
s_raw_crash_data.magic = 0;
}
bool crash_handler_has_data() { return s_crash_data_valid; }
static const char *get_exception_name(uint8_t exc) {
switch (exc) {
case CRASH_EXC_UNDEF:
return "Undefined instruction";
case CRASH_EXC_PABT:
return "Prefetch abort";
case CRASH_EXC_DABT:
return "Data abort";
default:
return "Unknown";
}
}
// Intentionally uses separate ESP_LOGE calls per line instead of combining into
// one multi-line log message. This ensures each address appears as its own line
// on the serial console, making it possible to see partial output if the device
// crashes again during boot, and allowing the CLI's process_stacktrace to match
// and decode each address individually.
void crash_handler_log() {
if (!s_crash_data_valid)
return;
ESP_LOGE(TAG, "*** CRASH DETECTED ON PREVIOUS BOOT ***");
ESP_LOGE(TAG, " Reason: %s", get_exception_name(s_raw_crash_data.exception));
ESP_LOGE(TAG, " PC: 0x%08" PRIX32 " (fault location)", s_raw_crash_data.pc);
ESP_LOGE(TAG, " LR: 0x%08" PRIX32 " (return address)", s_raw_crash_data.lr);
ESP_LOGE(TAG, " SP: 0x%08" PRIX32, s_raw_crash_data.sp);
ESP_LOGE(TAG, " CPSR:0x%08" PRIX32, s_raw_crash_data.cpsr);
for (uint8_t i = 0; i < s_raw_crash_data.backtrace_count; i++) {
ESP_LOGE(TAG, " BT%d: 0x%08" PRIX32 " (stack scan)", i, s_raw_crash_data.backtrace[i]);
}
// Build addr2line hint with all captured addresses for easy copy-paste.
// snprintf can return < 0 on error or >= size on truncation — clamp pos
// to a valid in-buffer offset before any further append, since we add
// `hint + pos` and OOB pointer arithmetic is UB.
char hint[160];
int written = snprintf(hint, sizeof(hint), "Use: addr2line -pfiaC -e firmware.elf 0x%08" PRIX32 " 0x%08" PRIX32,
s_raw_crash_data.pc, s_raw_crash_data.lr);
size_t pos = (written < 0) ? 0 : (static_cast<size_t>(written) >= sizeof(hint) ? sizeof(hint) - 1 : written);
for (uint8_t i = 0; i < s_raw_crash_data.backtrace_count && pos + 12 < sizeof(hint); i++) {
int n = snprintf(hint + pos, sizeof(hint) - pos, " 0x%08" PRIX32, s_raw_crash_data.backtrace[i]);
if (n < 0)
break;
if (static_cast<size_t>(n) >= sizeof(hint) - pos) {
// Truncated — buffer is full.
break;
}
pos += n;
}
ESP_LOGE(TAG, "%s", hint);
}
} // namespace esphome::bk72xx
// --- BK72XX trap-handler wrappers ---
// LibreTiny's fixup intc.c defines bk_trap_udef/pabt/dabt; the closed-source
// Beken SDK calls them from do_undefined/pabort/dabort after pushing a full
// arm_registers frame. We intercept the call via -Wl,--wrap so the original
// behavior (register dump on UART + bk_cpu_shutdown → watchdog reset) still
// runs after we've snapshotted the registers and a stack-scan backtrace.
extern "C" {
// NOLINTBEGIN(bugprone-reserved-identifier,cert-dcl37-c,cert-dcl51-cpp,readability-identifier-naming)
extern void __real_bk_trap_udef(struct arm_registers *regs);
extern void __real_bk_trap_pabt(struct arm_registers *regs);
extern void __real_bk_trap_dabt(struct arm_registers *regs);
static void capture_crash(uint8_t exception, struct arm_registers *regs) {
s_raw_crash_data.pc = regs->pc;
s_raw_crash_data.lr = regs->lr;
s_raw_crash_data.sp = regs->sp;
s_raw_crash_data.cpsr = regs->cpsr;
s_raw_crash_data.exception = exception;
s_raw_crash_data.backtrace_count = scan_backtrace(regs->sp, regs->pc, s_raw_crash_data.backtrace, MAX_BACKTRACE);
// Write version + magic last so a partial write is invalidated.
s_raw_crash_data.version = CRASH_DATA_VERSION;
s_raw_crash_data.magic = CRASH_MAGIC;
}
void __wrap_bk_trap_udef(struct arm_registers *regs) {
capture_crash(CRASH_EXC_UNDEF, regs);
__real_bk_trap_udef(regs);
}
void __wrap_bk_trap_pabt(struct arm_registers *regs) {
capture_crash(CRASH_EXC_PABT, regs);
__real_bk_trap_pabt(regs);
}
void __wrap_bk_trap_dabt(struct arm_registers *regs) {
capture_crash(CRASH_EXC_DABT, regs);
__real_bk_trap_dabt(regs);
}
// NOLINTEND(bugprone-reserved-identifier,cert-dcl37-c,cert-dcl51-cpp,readability-identifier-naming)
} // extern "C"
#endif // USE_BK72XX_CRASH_HANDLER
#endif // USE_BK72XX

View File

@@ -0,0 +1,26 @@
#pragma once
#ifdef USE_BK72XX
#include "esphome/core/defines.h"
#ifdef USE_BK72XX_CRASH_HANDLER
namespace esphome::bk72xx {
/// Read crash data from .noinit memory and clear the magic marker so the
/// next boot doesn't re-report it. Crash data is copied into a static and
/// remains accessible via crash_handler_log() / crash_handler_has_data() for
/// the rest of this boot session.
void crash_handler_read_and_clear();
/// Log crash data if a crash was detected on previous boot.
void crash_handler_log();
/// Returns true if crash data was found this boot.
bool crash_handler_has_data();
} // namespace esphome::bk72xx
#endif // USE_BK72XX_CRASH_HANDLER
#endif // USE_BK72XX

View File

@@ -479,6 +479,20 @@ async def component_to_code(config):
# RAM-executable output section and prints a post-link placement summary.
if FAMILY_COMPONENT[config[CONF_FAMILY]] != COMPONENT_BK72XX:
cg.add_platformio_option("extra_scripts", ["pre:patch_linker.py"])
else:
# BK72XX: wrap the SDK's exception trap handlers (defined in LibreTiny's
# intc.c fixup, called from the closed-source SDK's do_undefined/pabort/
# dabort) so we can capture register state + a stack-scan backtrace into
# a .noinit region before the SDK's own register dump + bk_cpu_shutdown
# chain runs. The .noinit region is injected into the generated linker
# script by patch_bk72xx_noinit.py so it survives the watchdog reset.
# bk72xx/__init__.py is auto-generated, so the define is set here
# rather than there. (defines.h has the matching #ifdef USE_BK72XX
# block for static analysis / IDE only — it doesn't drive the build.)
cg.add_define("USE_BK72XX_CRASH_HANDLER")
for sym in ("bk_trap_udef", "bk_trap_pabt", "bk_trap_dabt"):
cg.add_build_flag(f"-Wl,--wrap={sym}")
cg.add_platformio_option("extra_scripts", ["pre:patch_bk72xx_noinit.py"])
# dummy version code
cg.add_define("USE_ARDUINO_VERSION_CODE", cg.RawExpression("VERSION_CODE(0, 0, 0)"))
# decrease web server stack size (16k words -> 4k words)
@@ -568,8 +582,11 @@ async def component_to_code(config):
# Called by writer.py
def copy_files() -> None:
script_dir = Path(__file__).parent
patch_linker_file = script_dir / "patch_linker.py.script"
copy_file_if_changed(
patch_linker_file,
CORE.relative_build_path("patch_linker.py"),
)
for src_name, dst_name in (
("patch_linker.py.script", "patch_linker.py"),
("patch_bk72xx_noinit.py.script", "patch_bk72xx_noinit.py"),
):
copy_file_if_changed(
script_dir / src_name,
CORE.relative_build_path(dst_name),
)

View File

@@ -4,6 +4,10 @@
#include "esphome/core/hal.h"
#include "preferences.h"
#ifdef USE_BK72XX_CRASH_HANDLER
#include "esphome/components/bk72xx/crash_handler.h"
#endif
#include <FreeRTOS.h>
#include <task.h>
@@ -19,6 +23,11 @@ namespace esphome {
// inlined in core/hal/hal_libretiny.h.
void arch_init() {
#ifdef USE_BK72XX_CRASH_HANDLER
// Snapshot any crash data left in .noinit by the previous boot before the
// watchdog or anything else can perturb it.
bk72xx::crash_handler_read_and_clear();
#endif
libretiny::setup_preferences();
lt_wdt_enable(10000L);
#ifdef USE_BK72XX

View File

@@ -0,0 +1,83 @@
# pylint: disable=E0602
Import("env") # noqa
import os
import re
# BK72XX crash handler needs a region of RAM that is not zero-initialized
# by the C runtime startup (boot_handlers_bk7231u.S _sysboot_zi_init zeros
# from _bss_start to _bss_end). The stock linker script has no .noinit
# section, so we inject one between .bss and _empty_ram in the generated
# linker script: that places the section after _bss_end (skipping the
# zero-init range) and before the heap start, so the heap shrinks to fit
# rather than overlapping our crash record.
_MARKER = "/* esphome .noinit */"
# Insert a NOLOAD .noinit section right after the .bss block. NOLOAD prevents
# the linker from emitting any LMA copy data for it. We also emit PROVIDE
# symbols pinning RAM/flash bounds to the linker's own MEMORY definition;
# the crash handler reads these instead of hardcoding bounds, so it tracks
# the actual variant + board layout (BK7231N=192KB vs others=256KB,
# board-specific BKOFFSET_APP/BKRBL_SIZE_APP) without separate constants.
_NOINIT_BLOCK = (
"\n"
"\t.noinit ALIGN(8) (NOLOAD) :\n"
"\t{\n"
"\t\t_noinit_start = .;\n"
"\t\t*(.noinit*)\n"
"\t\t. = ALIGN(8);\n"
"\t\t_noinit_end = .;\n"
"\t} > ram\n"
"\n"
"\tPROVIDE(_esphome_ram_start = ORIGIN(ram));\n"
"\tPROVIDE(_esphome_ram_end = ORIGIN(ram) + LENGTH(ram));\n"
"\tPROVIDE(_esphome_flash_start = ORIGIN(flash));\n"
"\tPROVIDE(_esphome_flash_end = ORIGIN(flash) + LENGTH(flash));\n"
"\n"
+ "\t" + _MARKER + "\n"
)
# Match the closing brace + memory region of the .bss section, capturing
# everything up through "} > ram" so we can append our block after it.
_BSS_END = re.compile(
r"(\.bss\s+ALIGN\(8\)\s*:\s*\{[^}]*\}\s*>\s*ram\b[^\n]*\n)",
re.DOTALL,
)
def _patch(content):
if _MARKER in content:
return content
return _BSS_END.sub(r"\1" + _NOINIT_BLOCK, content, count=1)
def _pre_link(target, source, env):
build_dir = env.subst("$BUILD_DIR")
if not os.path.isdir(build_dir):
return
patched = 0
for name in os.listdir(build_dir):
if not name.endswith(".ld"):
continue
path = os.path.join(build_dir, name)
with open(path, "r", encoding="utf-8") as fh:
original = fh.read()
if _MARKER in original:
patched += 1
continue
content = _patch(original)
if content != original:
with open(path, "w", encoding="utf-8") as fh:
fh.write(content)
print("ESPHome: injected .noinit into {} for crash handler".format(name))
patched += 1
if not patched:
raise RuntimeError(
"ESPHome: could not inject .noinit into any BK72XX linker script. "
"The crash handler relies on a non-zeroed RAM region; without it, "
"previous-boot crash data would be wiped at startup. Update "
"patch_bk72xx_noinit.py.script — the .bss matcher likely no longer "
"matches the current LibreTiny linker template."
)
env.AddPreAction("$BUILD_DIR/${PROGNAME}.elf", _pre_link)

View File

@@ -1,5 +1,9 @@
#ifdef USE_LIBRETINY
#include "logger.h"
#include "esphome/core/defines.h"
#ifdef USE_BK72XX_CRASH_HANDLER
#include "esphome/components/bk72xx/crash_handler.h"
#endif
namespace esphome::logger {
@@ -47,6 +51,9 @@ void Logger::pre_setup() {
global_logger = this;
ESP_LOGI(TAG, "Log initialized");
#ifdef USE_BK72XX_CRASH_HANDLER
bk72xx::crash_handler_log();
#endif
}
const LogString *Logger::get_uart_selection_() {

View File

@@ -420,6 +420,10 @@
#define ESPHOME_TASK_LOG_BUFFER_SIZE 768
#endif
#ifdef USE_BK72XX
#define USE_BK72XX_CRASH_HANDLER
#endif
#ifdef USE_HOST
#define USE_HTTP_REQUEST_RESPONSE
#define USE_SOCKET_IMPL_BSD_SOCKETS