mirror of https://github.com/commaai/openpilot.git
Add watchdog check to manager (#20277)
* this should work but doesnt
* Only offroad
* works
* make it work offorad
* reduce diff
* cleanup
* need util
Co-authored-by: Adeeb Shihadeh <adeebshihadeh@gmail.com>
old-commit-hash: a94ba4fb8b
This commit is contained in:
parent
e80ea20b34
commit
4d6c98aa6b
|
@ -196,6 +196,8 @@ selfdrive/common/clutil.cc
|
||||||
selfdrive/common/clutil.h
|
selfdrive/common/clutil.h
|
||||||
selfdrive/common/params.h
|
selfdrive/common/params.h
|
||||||
selfdrive/common/params.cc
|
selfdrive/common/params.cc
|
||||||
|
selfdrive/common/watchdog.cc
|
||||||
|
selfdrive/common/watchdog.h
|
||||||
|
|
||||||
selfdrive/common/modeldata.h
|
selfdrive/common/modeldata.h
|
||||||
selfdrive/common/mat.h
|
selfdrive/common/mat.h
|
||||||
|
|
|
@ -5,7 +5,14 @@ if SHARED:
|
||||||
else:
|
else:
|
||||||
fxn = env.Library
|
fxn = env.Library
|
||||||
|
|
||||||
common_libs = ['params.cc', 'swaglog.cc', 'util.cc', 'gpio.cc', 'i2c.cc']
|
common_libs = [
|
||||||
|
'params.cc',
|
||||||
|
'swaglog.cc',
|
||||||
|
'util.cc',
|
||||||
|
'gpio.cc',
|
||||||
|
'i2c.cc',
|
||||||
|
'watchdog.cc',
|
||||||
|
]
|
||||||
|
|
||||||
_common = fxn('common', common_libs, LIBS="json11")
|
_common = fxn('common', common_libs, LIBS="json11")
|
||||||
|
|
||||||
|
|
|
@ -1,11 +1,7 @@
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <assert.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <fcntl.h>
|
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
|
||||||
|
#include "common/util.h"
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
#include <sys/prctl.h>
|
#include <sys/prctl.h>
|
||||||
#include <sys/syscall.h>
|
#include <sys/syscall.h>
|
||||||
|
@ -45,8 +41,8 @@ void* read_file(const char* path, size_t* out_len) {
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
int write_file(const char* path, const void* data, size_t size) {
|
int write_file(const char* path, const void* data, size_t size, int flags, mode_t mode) {
|
||||||
int fd = open(path, O_WRONLY);
|
int fd = open(path, flags, mode);
|
||||||
if (fd == -1) {
|
if (fd == -1) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,9 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <cstdio>
|
||||||
#include <unistd.h>
|
|
||||||
#include <csignal>
|
#include <csignal>
|
||||||
|
#include <cstring>
|
||||||
|
#include <cstdlib>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
|
@ -10,6 +11,9 @@
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
#include <cassert>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
|
||||||
#ifndef sighandler_t
|
#ifndef sighandler_t
|
||||||
typedef void (*sighandler_t)(int sig);
|
typedef void (*sighandler_t)(int sig);
|
||||||
|
@ -25,7 +29,7 @@ typedef void (*sighandler_t)(int sig);
|
||||||
// Returns NULL on failure, otherwise the NULL-terminated file contents.
|
// Returns NULL on failure, otherwise the NULL-terminated file contents.
|
||||||
// The result must be freed by the caller.
|
// The result must be freed by the caller.
|
||||||
void* read_file(const char* path, size_t* out_len);
|
void* read_file(const char* path, size_t* out_len);
|
||||||
int write_file(const char* path, const void* data, size_t size);
|
int write_file(const char* path, const void* data, size_t size, int flags=O_WRONLY, mode_t mode=0777);
|
||||||
|
|
||||||
void set_thread_name(const char* name);
|
void set_thread_name(const char* name);
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,17 @@
|
||||||
|
#include <string>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include "common/timing.h"
|
||||||
|
#include "common/util.h"
|
||||||
|
#include "common/watchdog.h"
|
||||||
|
|
||||||
|
const std::string watchdog_fn_prefix = "/dev/shm/wd_"; // + <pid>
|
||||||
|
|
||||||
|
bool watchdog_kick(){
|
||||||
|
std::string fn = watchdog_fn_prefix + std::to_string(getpid());
|
||||||
|
std::string cur_t = std::to_string(nanos_since_boot());
|
||||||
|
|
||||||
|
int r = write_file(fn.c_str(), cur_t.data(), cur_t.length(), O_WRONLY | O_CREAT);
|
||||||
|
return r == 0;
|
||||||
|
}
|
|
@ -0,0 +1,3 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
bool watchdog_kick();
|
|
@ -12,10 +12,14 @@ import cereal.messaging as messaging
|
||||||
import selfdrive.crash as crash
|
import selfdrive.crash as crash
|
||||||
from common.basedir import BASEDIR
|
from common.basedir import BASEDIR
|
||||||
from common.params import Params
|
from common.params import Params
|
||||||
|
from common.realtime import sec_since_boot
|
||||||
from selfdrive.swaglog import cloudlog
|
from selfdrive.swaglog import cloudlog
|
||||||
from selfdrive.hardware import HARDWARE
|
from selfdrive.hardware import HARDWARE
|
||||||
from cereal import log
|
from cereal import log
|
||||||
|
|
||||||
|
WATCHDOG_FN = "/dev/shm/wd_"
|
||||||
|
ENABLE_WATCHDOG = os.getenv("NO_WATCHDOG") is None
|
||||||
|
|
||||||
|
|
||||||
def launcher(proc):
|
def launcher(proc):
|
||||||
try:
|
try:
|
||||||
|
@ -61,6 +65,10 @@ class ManagerProcess(ABC):
|
||||||
enabled = True
|
enabled = True
|
||||||
name = ""
|
name = ""
|
||||||
|
|
||||||
|
last_watchdog_time = 0
|
||||||
|
watchdog_max_dt = None
|
||||||
|
watchdog_seen = False
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def prepare(self):
|
def prepare(self):
|
||||||
pass
|
pass
|
||||||
|
@ -69,6 +77,30 @@ class ManagerProcess(ABC):
|
||||||
def start(self):
|
def start(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def restart(self):
|
||||||
|
self.stop()
|
||||||
|
self.start()
|
||||||
|
|
||||||
|
def check_watchdog(self, started):
|
||||||
|
if self.watchdog_max_dt is None or self.proc is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
fn = WATCHDOG_FN + str(self.proc.pid)
|
||||||
|
self.last_watchdog_time = int(open(fn).read())
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
dt = sec_since_boot() - self.last_watchdog_time / 1e9
|
||||||
|
|
||||||
|
if dt > self.watchdog_max_dt:
|
||||||
|
# Only restart while offroad for now
|
||||||
|
if self.watchdog_seen and ENABLE_WATCHDOG and (not started):
|
||||||
|
cloudlog.error(f"Watchdog timeout for {self.name}, restarting")
|
||||||
|
self.restart()
|
||||||
|
else:
|
||||||
|
self.watchdog_seen = True
|
||||||
|
|
||||||
def stop(self, retry=True):
|
def stop(self, retry=True):
|
||||||
if self.proc is None:
|
if self.proc is None:
|
||||||
return
|
return
|
||||||
|
@ -128,7 +160,7 @@ class ManagerProcess(ABC):
|
||||||
|
|
||||||
|
|
||||||
class NativeProcess(ManagerProcess):
|
class NativeProcess(ManagerProcess):
|
||||||
def __init__(self, name, cwd, cmdline, enabled=True, persistent=False, driverview=False, unkillable=False, sigkill=False):
|
def __init__(self, name, cwd, cmdline, enabled=True, persistent=False, driverview=False, unkillable=False, sigkill=False, watchdog_max_dt=None):
|
||||||
self.name = name
|
self.name = name
|
||||||
self.cwd = cwd
|
self.cwd = cwd
|
||||||
self.cmdline = cmdline
|
self.cmdline = cmdline
|
||||||
|
@ -137,6 +169,7 @@ class NativeProcess(ManagerProcess):
|
||||||
self.driverview = driverview
|
self.driverview = driverview
|
||||||
self.unkillable = unkillable
|
self.unkillable = unkillable
|
||||||
self.sigkill = sigkill
|
self.sigkill = sigkill
|
||||||
|
self.watchdog_max_dt = watchdog_max_dt
|
||||||
|
|
||||||
def prepare(self):
|
def prepare(self):
|
||||||
pass
|
pass
|
||||||
|
@ -149,10 +182,11 @@ class NativeProcess(ManagerProcess):
|
||||||
cloudlog.info("starting process %s" % self.name)
|
cloudlog.info("starting process %s" % self.name)
|
||||||
self.proc = Process(name=self.name, target=nativelauncher, args=(self.cmdline, cwd))
|
self.proc = Process(name=self.name, target=nativelauncher, args=(self.cmdline, cwd))
|
||||||
self.proc.start()
|
self.proc.start()
|
||||||
|
self.watchdog_seen = False
|
||||||
|
|
||||||
|
|
||||||
class PythonProcess(ManagerProcess):
|
class PythonProcess(ManagerProcess):
|
||||||
def __init__(self, name, module, enabled=True, persistent=False, driverview=False, unkillable=False, sigkill=False):
|
def __init__(self, name, module, enabled=True, persistent=False, driverview=False, unkillable=False, sigkill=False, watchdog_max_dt=None):
|
||||||
self.name = name
|
self.name = name
|
||||||
self.module = module
|
self.module = module
|
||||||
self.enabled = enabled
|
self.enabled = enabled
|
||||||
|
@ -160,6 +194,7 @@ class PythonProcess(ManagerProcess):
|
||||||
self.driverview = driverview
|
self.driverview = driverview
|
||||||
self.unkillable = unkillable
|
self.unkillable = unkillable
|
||||||
self.sigkill = sigkill
|
self.sigkill = sigkill
|
||||||
|
self.watchdog_max_dt = watchdog_max_dt
|
||||||
|
|
||||||
def prepare(self):
|
def prepare(self):
|
||||||
if self.enabled:
|
if self.enabled:
|
||||||
|
@ -173,6 +208,7 @@ class PythonProcess(ManagerProcess):
|
||||||
cloudlog.info("starting python %s" % self.module)
|
cloudlog.info("starting python %s" % self.module)
|
||||||
self.proc = Process(name=self.name, target=launcher, args=(self.module,))
|
self.proc = Process(name=self.name, target=launcher, args=(self.module,))
|
||||||
self.proc.start()
|
self.proc.start()
|
||||||
|
self.watchdog_seen = False
|
||||||
|
|
||||||
|
|
||||||
class DaemonProcess(ManagerProcess):
|
class DaemonProcess(ManagerProcess):
|
||||||
|
@ -234,3 +270,6 @@ def ensure_running(procs, started, driverview=False, not_run=None):
|
||||||
p.start()
|
p.start()
|
||||||
else:
|
else:
|
||||||
p.stop()
|
p.stop()
|
||||||
|
|
||||||
|
p.check_watchdog(started)
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,7 @@ procs = [
|
||||||
NativeProcess("proclogd", "selfdrive/proclogd", ["./proclogd"]),
|
NativeProcess("proclogd", "selfdrive/proclogd", ["./proclogd"]),
|
||||||
NativeProcess("sensord", "selfdrive/sensord", ["./sensord"], enabled=not PC, persistent=EON, sigkill=EON),
|
NativeProcess("sensord", "selfdrive/sensord", ["./sensord"], enabled=not PC, persistent=EON, sigkill=EON),
|
||||||
NativeProcess("ubloxd", "selfdrive/locationd", ["./ubloxd"], enabled=(not PC or WEBCAM)),
|
NativeProcess("ubloxd", "selfdrive/locationd", ["./ubloxd"], enabled=(not PC or WEBCAM)),
|
||||||
NativeProcess("ui", "selfdrive/ui", ["./ui"], persistent=True),
|
NativeProcess("ui", "selfdrive/ui", ["./ui"], persistent=True, watchdog_max_dt=10),
|
||||||
PythonProcess("calibrationd", "selfdrive.locationd.calibrationd"),
|
PythonProcess("calibrationd", "selfdrive.locationd.calibrationd"),
|
||||||
PythonProcess("controlsd", "selfdrive.controls.controlsd"),
|
PythonProcess("controlsd", "selfdrive.controls.controlsd"),
|
||||||
PythonProcess("deleter", "selfdrive.loggerd.deleter", persistent=True),
|
PythonProcess("deleter", "selfdrive.loggerd.deleter", persistent=True),
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include "common/params.h"
|
#include "common/params.h"
|
||||||
#include "common/touch.h"
|
#include "common/touch.h"
|
||||||
#include "common/swaglog.h"
|
#include "common/swaglog.h"
|
||||||
|
#include "common/watchdog.h"
|
||||||
|
|
||||||
#include "ui.hpp"
|
#include "ui.hpp"
|
||||||
#include "paint.hpp"
|
#include "paint.hpp"
|
||||||
|
@ -139,6 +140,7 @@ int main(int argc, char* argv[]) {
|
||||||
s->sound->setVolume(MIN_VOLUME);
|
s->sound->setVolume(MIN_VOLUME);
|
||||||
|
|
||||||
while (!do_exit) {
|
while (!do_exit) {
|
||||||
|
watchdog_kick();
|
||||||
if (!s->scene.started) {
|
if (!s->scene.started) {
|
||||||
util::sleep_for(50);
|
util::sleep_for(50);
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
#include "common/params.h"
|
#include "common/params.h"
|
||||||
#include "common/timing.h"
|
#include "common/timing.h"
|
||||||
#include "common/swaglog.h"
|
#include "common/swaglog.h"
|
||||||
|
#include "common/watchdog.h"
|
||||||
|
|
||||||
#include "home.hpp"
|
#include "home.hpp"
|
||||||
#include "paint.hpp"
|
#include "paint.hpp"
|
||||||
|
@ -276,6 +277,7 @@ void GLWindow::timerUpdate() {
|
||||||
|
|
||||||
ui_update(&ui_state);
|
ui_update(&ui_state);
|
||||||
repaint();
|
repaint();
|
||||||
|
watchdog_kick();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GLWindow::resizeGL(int w, int h) {
|
void GLWindow::resizeGL(int w, int h) {
|
||||||
|
|
Loading…
Reference in New Issue