mirror of https://github.com/commaai/openpilot.git
Add watchdog check to manager (#20277)
* this should work but doesnt
* Only offroad
* works
* make it work offorad
* reduce diff
* cleanup
* need util
Co-authored-by: Adeeb Shihadeh <adeebshihadeh@gmail.com>
old-commit-hash: a94ba4fb8b
This commit is contained in:
parent
e80ea20b34
commit
4d6c98aa6b
|
@ -196,6 +196,8 @@ selfdrive/common/clutil.cc
|
|||
selfdrive/common/clutil.h
|
||||
selfdrive/common/params.h
|
||||
selfdrive/common/params.cc
|
||||
selfdrive/common/watchdog.cc
|
||||
selfdrive/common/watchdog.h
|
||||
|
||||
selfdrive/common/modeldata.h
|
||||
selfdrive/common/mat.h
|
||||
|
|
|
@ -5,7 +5,14 @@ if SHARED:
|
|||
else:
|
||||
fxn = env.Library
|
||||
|
||||
common_libs = ['params.cc', 'swaglog.cc', 'util.cc', 'gpio.cc', 'i2c.cc']
|
||||
common_libs = [
|
||||
'params.cc',
|
||||
'swaglog.cc',
|
||||
'util.cc',
|
||||
'gpio.cc',
|
||||
'i2c.cc',
|
||||
'watchdog.cc',
|
||||
]
|
||||
|
||||
_common = fxn('common', common_libs, LIBS="json11")
|
||||
|
||||
|
|
|
@ -1,11 +1,7 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "common/util.h"
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/prctl.h>
|
||||
#include <sys/syscall.h>
|
||||
|
@ -45,8 +41,8 @@ void* read_file(const char* path, size_t* out_len) {
|
|||
return buf;
|
||||
}
|
||||
|
||||
int write_file(const char* path, const void* data, size_t size) {
|
||||
int fd = open(path, O_WRONLY);
|
||||
int write_file(const char* path, const void* data, size_t size, int flags, mode_t mode) {
|
||||
int fd = open(path, flags, mode);
|
||||
if (fd == -1) {
|
||||
return -1;
|
||||
}
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
#pragma once
|
||||
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <cstdio>
|
||||
#include <csignal>
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <atomic>
|
||||
|
@ -10,6 +11,9 @@
|
|||
#include <fstream>
|
||||
#include <thread>
|
||||
#include <chrono>
|
||||
#include <cassert>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#ifndef sighandler_t
|
||||
typedef void (*sighandler_t)(int sig);
|
||||
|
@ -25,7 +29,7 @@ typedef void (*sighandler_t)(int sig);
|
|||
// Returns NULL on failure, otherwise the NULL-terminated file contents.
|
||||
// The result must be freed by the caller.
|
||||
void* read_file(const char* path, size_t* out_len);
|
||||
int write_file(const char* path, const void* data, size_t size);
|
||||
int write_file(const char* path, const void* data, size_t size, int flags=O_WRONLY, mode_t mode=0777);
|
||||
|
||||
void set_thread_name(const char* name);
|
||||
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
#include <string>
|
||||
#include <cstdint>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "common/timing.h"
|
||||
#include "common/util.h"
|
||||
#include "common/watchdog.h"
|
||||
|
||||
const std::string watchdog_fn_prefix = "/dev/shm/wd_"; // + <pid>
|
||||
|
||||
bool watchdog_kick(){
|
||||
std::string fn = watchdog_fn_prefix + std::to_string(getpid());
|
||||
std::string cur_t = std::to_string(nanos_since_boot());
|
||||
|
||||
int r = write_file(fn.c_str(), cur_t.data(), cur_t.length(), O_WRONLY | O_CREAT);
|
||||
return r == 0;
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
#pragma once
|
||||
|
||||
bool watchdog_kick();
|
|
@ -12,10 +12,14 @@ import cereal.messaging as messaging
|
|||
import selfdrive.crash as crash
|
||||
from common.basedir import BASEDIR
|
||||
from common.params import Params
|
||||
from common.realtime import sec_since_boot
|
||||
from selfdrive.swaglog import cloudlog
|
||||
from selfdrive.hardware import HARDWARE
|
||||
from cereal import log
|
||||
|
||||
WATCHDOG_FN = "/dev/shm/wd_"
|
||||
ENABLE_WATCHDOG = os.getenv("NO_WATCHDOG") is None
|
||||
|
||||
|
||||
def launcher(proc):
|
||||
try:
|
||||
|
@ -61,6 +65,10 @@ class ManagerProcess(ABC):
|
|||
enabled = True
|
||||
name = ""
|
||||
|
||||
last_watchdog_time = 0
|
||||
watchdog_max_dt = None
|
||||
watchdog_seen = False
|
||||
|
||||
@abstractmethod
|
||||
def prepare(self):
|
||||
pass
|
||||
|
@ -69,6 +77,30 @@ class ManagerProcess(ABC):
|
|||
def start(self):
|
||||
pass
|
||||
|
||||
def restart(self):
|
||||
self.stop()
|
||||
self.start()
|
||||
|
||||
def check_watchdog(self, started):
|
||||
if self.watchdog_max_dt is None or self.proc is None:
|
||||
return
|
||||
|
||||
try:
|
||||
fn = WATCHDOG_FN + str(self.proc.pid)
|
||||
self.last_watchdog_time = int(open(fn).read())
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
dt = sec_since_boot() - self.last_watchdog_time / 1e9
|
||||
|
||||
if dt > self.watchdog_max_dt:
|
||||
# Only restart while offroad for now
|
||||
if self.watchdog_seen and ENABLE_WATCHDOG and (not started):
|
||||
cloudlog.error(f"Watchdog timeout for {self.name}, restarting")
|
||||
self.restart()
|
||||
else:
|
||||
self.watchdog_seen = True
|
||||
|
||||
def stop(self, retry=True):
|
||||
if self.proc is None:
|
||||
return
|
||||
|
@ -128,7 +160,7 @@ class ManagerProcess(ABC):
|
|||
|
||||
|
||||
class NativeProcess(ManagerProcess):
|
||||
def __init__(self, name, cwd, cmdline, enabled=True, persistent=False, driverview=False, unkillable=False, sigkill=False):
|
||||
def __init__(self, name, cwd, cmdline, enabled=True, persistent=False, driverview=False, unkillable=False, sigkill=False, watchdog_max_dt=None):
|
||||
self.name = name
|
||||
self.cwd = cwd
|
||||
self.cmdline = cmdline
|
||||
|
@ -137,6 +169,7 @@ class NativeProcess(ManagerProcess):
|
|||
self.driverview = driverview
|
||||
self.unkillable = unkillable
|
||||
self.sigkill = sigkill
|
||||
self.watchdog_max_dt = watchdog_max_dt
|
||||
|
||||
def prepare(self):
|
||||
pass
|
||||
|
@ -149,10 +182,11 @@ class NativeProcess(ManagerProcess):
|
|||
cloudlog.info("starting process %s" % self.name)
|
||||
self.proc = Process(name=self.name, target=nativelauncher, args=(self.cmdline, cwd))
|
||||
self.proc.start()
|
||||
self.watchdog_seen = False
|
||||
|
||||
|
||||
class PythonProcess(ManagerProcess):
|
||||
def __init__(self, name, module, enabled=True, persistent=False, driverview=False, unkillable=False, sigkill=False):
|
||||
def __init__(self, name, module, enabled=True, persistent=False, driverview=False, unkillable=False, sigkill=False, watchdog_max_dt=None):
|
||||
self.name = name
|
||||
self.module = module
|
||||
self.enabled = enabled
|
||||
|
@ -160,6 +194,7 @@ class PythonProcess(ManagerProcess):
|
|||
self.driverview = driverview
|
||||
self.unkillable = unkillable
|
||||
self.sigkill = sigkill
|
||||
self.watchdog_max_dt = watchdog_max_dt
|
||||
|
||||
def prepare(self):
|
||||
if self.enabled:
|
||||
|
@ -173,6 +208,7 @@ class PythonProcess(ManagerProcess):
|
|||
cloudlog.info("starting python %s" % self.module)
|
||||
self.proc = Process(name=self.name, target=launcher, args=(self.module,))
|
||||
self.proc.start()
|
||||
self.watchdog_seen = False
|
||||
|
||||
|
||||
class DaemonProcess(ManagerProcess):
|
||||
|
@ -234,3 +270,6 @@ def ensure_running(procs, started, driverview=False, not_run=None):
|
|||
p.start()
|
||||
else:
|
||||
p.stop()
|
||||
|
||||
p.check_watchdog(started)
|
||||
|
||||
|
|
|
@ -17,7 +17,7 @@ procs = [
|
|||
NativeProcess("proclogd", "selfdrive/proclogd", ["./proclogd"]),
|
||||
NativeProcess("sensord", "selfdrive/sensord", ["./sensord"], enabled=not PC, persistent=EON, sigkill=EON),
|
||||
NativeProcess("ubloxd", "selfdrive/locationd", ["./ubloxd"], enabled=(not PC or WEBCAM)),
|
||||
NativeProcess("ui", "selfdrive/ui", ["./ui"], persistent=True),
|
||||
NativeProcess("ui", "selfdrive/ui", ["./ui"], persistent=True, watchdog_max_dt=10),
|
||||
PythonProcess("calibrationd", "selfdrive.locationd.calibrationd"),
|
||||
PythonProcess("controlsd", "selfdrive.controls.controlsd"),
|
||||
PythonProcess("deleter", "selfdrive.loggerd.deleter", persistent=True),
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include "common/params.h"
|
||||
#include "common/touch.h"
|
||||
#include "common/swaglog.h"
|
||||
#include "common/watchdog.h"
|
||||
|
||||
#include "ui.hpp"
|
||||
#include "paint.hpp"
|
||||
|
@ -139,6 +140,7 @@ int main(int argc, char* argv[]) {
|
|||
s->sound->setVolume(MIN_VOLUME);
|
||||
|
||||
while (!do_exit) {
|
||||
watchdog_kick();
|
||||
if (!s->scene.started) {
|
||||
util::sleep_for(50);
|
||||
}
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "common/params.h"
|
||||
#include "common/timing.h"
|
||||
#include "common/swaglog.h"
|
||||
#include "common/watchdog.h"
|
||||
|
||||
#include "home.hpp"
|
||||
#include "paint.hpp"
|
||||
|
@ -276,6 +277,7 @@ void GLWindow::timerUpdate() {
|
|||
|
||||
ui_update(&ui_state);
|
||||
repaint();
|
||||
watchdog_kick();
|
||||
}
|
||||
|
||||
void GLWindow::resizeGL(int w, int h) {
|
||||
|
|
Loading…
Reference in New Issue