SOM boot recovery (#1712)

* reset

* simpler

* only once per panda boot

* cleanup

* also check serial
This commit is contained in:
Adeeb Shihadeh
2023-11-09 18:01:46 -08:00
committed by GitHub
parent d2ea9ad293
commit a1d699b87d
7 changed files with 84 additions and 5 deletions

View File

@@ -2,6 +2,7 @@
typedef enum {
BOOT_STANDBY,
BOOT_BOOTKICK,
BOOT_RESET,
} BootState;
typedef void (*board_init)(void);

View File

@@ -16,7 +16,7 @@ void tres_set_ir_power(uint8_t percentage){
void tres_set_bootkick(BootState state) {
set_gpio_output(GPIOA, 0, state != BOOT_BOOTKICK);
set_gpio_output(GPIOC, 12, 1);
set_gpio_output(GPIOC, 12, state != BOOT_RESET);
}
void tres_set_fan_enabled(bool enabled) {

View File

@@ -2,7 +2,13 @@ bool bootkick_ign_prev = false;
BootState boot_state = BOOT_BOOTKICK;
uint8_t bootkick_harness_status_prev = HARNESS_STATUS_NC;
uint8_t boot_reset_countdown = 0;
uint8_t waiting_to_boot_countdown = 0;
bool bootkick_reset_triggered = false;
uint16_t bootkick_last_serial_ptr = 0;
void bootkick_tick(bool ignition, bool recent_heartbeat) {
BootState boot_state_prev = boot_state;
const bool harness_inserted = (harness.status != bootkick_harness_status_prev) && (harness.status != HARNESS_STATUS_NC);
if ((ignition && !bootkick_ign_prev) || harness_inserted) {
@@ -15,8 +21,47 @@ void bootkick_tick(bool ignition, bool recent_heartbeat) {
}
/*
Ensure SOM boots in case it goes into QDL mode. Reset behavior:
* shouldn't trigger on the first boot after power-on
* only try reset once per bootkick, i.e. don't keep trying until booted
* only try once per panda boot, since openpilot will reset panda on startup
* once BOOT_RESET is triggered, it stays until countdown is finished
*/
if (!bootkick_reset_triggered && (boot_state == BOOT_BOOTKICK) && (boot_state_prev == BOOT_STANDBY)) {
waiting_to_boot_countdown = 45U;
}
if (waiting_to_boot_countdown > 0U) {
bool serial_activity = uart_ring_som_debug.w_ptr_tx != bootkick_last_serial_ptr;
if (serial_activity || current_board->read_som_gpio() || (boot_state != BOOT_BOOTKICK)) {
waiting_to_boot_countdown = 0U;
} else {
// try a reset
if (waiting_to_boot_countdown == 1U) {
boot_reset_countdown = 5U;
}
}
}
// handle reset state
if (boot_reset_countdown > 0U) {
boot_state = BOOT_RESET;
bootkick_reset_triggered = true;
} else {
if (boot_state == BOOT_RESET) {
boot_state = BOOT_BOOTKICK;
}
}
// update state
bootkick_ign_prev = ignition;
bootkick_harness_status_prev = harness.status;
bootkick_last_serial_ptr = uart_ring_som_debug.w_ptr_tx;
if (waiting_to_boot_countdown > 0U) {
waiting_to_boot_countdown--;
}
if (boot_reset_countdown > 0U) {
boot_reset_countdown--;
}
current_board->set_bootkick(boot_state);
}

View File

@@ -29,6 +29,7 @@ struct __attribute__((packed)) health_t {
uint8_t fan_stall_count;
uint16_t sbu1_voltage_mV;
uint16_t sbu2_voltage_mV;
uint8_t som_reset_triggered;
};
#define CAN_HEALTH_PACKET_VERSION 5

View File

@@ -44,6 +44,8 @@ int get_health_pkt(void *dat) {
health->sbu1_voltage_mV = harness.sbu1_voltage_mV;
health->sbu2_voltage_mV = harness.sbu2_voltage_mV;
health->som_reset_triggered = bootkick_reset_triggered;
return sizeof(*health);
}

View File

@@ -169,7 +169,7 @@ class Panda:
CAN_PACKET_VERSION = 4
HEALTH_PACKET_VERSION = 14
CAN_HEALTH_PACKET_VERSION = 5
HEALTH_STRUCT = struct.Struct("<IIIIIIIIIBBBBBBHBBBHfBBHBHH")
HEALTH_STRUCT = struct.Struct("<IIIIIIIIIBBBBBBHBBBHfBBHBHHB")
CAN_HEALTH_STRUCT = struct.Struct("<BIBBBBBBBBIIIIIIIHHBBBIIII")
F2_DEVICES = [HW_TYPE_PEDAL, ]
@@ -606,6 +606,7 @@ class Panda:
"fan_stall_count": a[24],
"sbu1_voltage_mV": a[25],
"sbu2_voltage_mV": a[26],
"som_reset_triggered": a[27],
}
@ensure_can_health_packet_version

View File

@@ -7,7 +7,6 @@ from panda import Panda, PandaJungle
PANDA_SERIAL = "28002d000451323431333839"
JUNGLE_SERIAL = "26001c001451313236343430"
AUX_PORT = 6
OBDC_PORT = 1
@pytest.fixture(autouse=True, scope="function")
@@ -15,6 +14,7 @@ def pj():
jungle = PandaJungle(JUNGLE_SERIAL)
jungle.flash()
jungle.reset()
jungle.set_ignition(False)
yield jungle
@@ -42,7 +42,6 @@ def setup_state(panda, jungle, state):
elif state == "normal boot":
jungle.set_panda_individual_power(OBDC_PORT, 1)
elif state == "QDL":
jungle.set_panda_individual_power(AUX_PORT, 1)
time.sleep(0.5)
jungle.set_panda_individual_power(OBDC_PORT, 1)
elif state == "ready to bootkick":
@@ -50,6 +49,7 @@ def setup_state(panda, jungle, state):
jungle.set_panda_individual_power(OBDC_PORT, 1)
wait_for_boot(panda, jungle)
set_som_shutdown_flag(panda)
panda.set_safety_mode(Panda.SAFETY_SILENT)
panda.send_heartbeat()
wait_for_som_shutdown(panda, jungle)
else:
@@ -62,7 +62,11 @@ def wait_for_som_shutdown(panda, jungle):
# can take a while for the SOM to fully shutdown
if time.monotonic() - st > 120:
raise Exception("SOM didn't shutdown in time")
if check_som_boot_flag(panda):
raise Exception(f"SOM rebooted instead of shutdown: {time.monotonic() - st}s")
time.sleep(0.5)
dt = time.monotonic() - st
print("waiting for shutdown", round(dt))
dt = time.monotonic() - st
print(f"took {dt:.2f}s for SOM to shutdown")
@@ -84,7 +88,7 @@ def check_som_boot_flag(panda):
def set_som_shutdown_flag(panda):
panda.set_datetime(datetime.datetime(year=2040, month=8, day=23))
def wait_for_boot(panda, jungle, bootkick=False, timeout=120):
def wait_for_boot(panda, jungle, reset_expected=False, bootkick=False, timeout=120):
st = time.monotonic()
Panda.wait_for_panda(PANDA_SERIAL, timeout)
@@ -104,6 +108,7 @@ def wait_for_boot(panda, jungle, bootkick=False, timeout=120):
raise Exception("SOM didn't boot in time")
time.sleep(1.0)
assert panda.health()['som_reset_triggered'] == reset_expected
def test_cold_boot(p, pj):
setup_state(p, pj, "off")
@@ -123,3 +128,27 @@ def test_bootkick_can_ignition(p, pj):
pj.can_send(0x9E, b'\xc0\x00\x00\x00\x00\x00\x00\x00', 0)
time.sleep(0.5)
wait_for_boot(p, pj, bootkick=True)
def test_recovery_from_qdl(p, pj):
setup_state(p, pj, "ready to bootkick")
# put into QDL using the FORCE_USB_BOOT pin
for i in range(10):
pj.set_header_pin(i, 1)
# try to boot
time.sleep(1)
pj.set_ignition(True)
time.sleep(3)
# release FORCE_USB_BOOT
for i in range(10):
pj.set_header_pin(i, 0)
# normally, this GPIO is set immediately since it's first enabled in the ABL
for i in range(40):
assert not p.read_som_gpio()
time.sleep(1)
# should boot after 45s
wait_for_boot(p, pj, reset_expected=True, bootkick=True, timeout=120)