Refactor CPU usage test (#1802)
* pull CPU usage test out of manager
* remove that
* add to release files
* cleanup
* executable
* this should work
* check if car started procs are running
* debug
* add min cpu usage
* remove debug prints
* adjust min cpu threshold
Co-authored-by: Comma Device <device@comma.ai>
old-commit-hash: 6db7fa8c03
This commit is contained in:
parent
fa06882a72
commit
f74865bedf
|
@ -1,50 +1 @@
|
|||
def cputime_total(ct):
|
||||
return ct.cpuUser + ct.cpuSystem + ct.cpuChildrenUser + ct.cpuChildrenSystem
|
||||
|
||||
|
||||
def print_cpu_usage(first_proc, last_proc):
|
||||
r = 0
|
||||
procs = [
|
||||
("selfdrive.controls.controlsd", 59.46),
|
||||
("./_modeld", 12.74),
|
||||
("./loggerd", 28.49),
|
||||
("selfdrive.controls.plannerd", 19.77),
|
||||
("selfdrive.controls.radard", 9.54),
|
||||
("./_ui", 9.54),
|
||||
("./camerad", 7.07),
|
||||
("selfdrive.locationd.locationd", 34.38),
|
||||
("selfdrive.locationd.paramsd", 11.53),
|
||||
("./_sensord", 6.17),
|
||||
("selfdrive.monitoring.dmonitoringd", 5.48),
|
||||
("./boardd", 3.63),
|
||||
("./_dmonitoringmodeld", 2.67),
|
||||
("selfdrive.logmessaged", 2.71),
|
||||
("selfdrive.thermald.thermald", 2.41),
|
||||
("selfdrive.locationd.calibrationd", 6.81),
|
||||
("./proclogd", 1.54),
|
||||
("./_gpsd", 0.09),
|
||||
("./clocksd", 0.02),
|
||||
("./ubloxd", 0.02),
|
||||
("selfdrive.tombstoned", 0),
|
||||
("./logcatd", 0),
|
||||
]
|
||||
|
||||
dt = (last_proc.logMonoTime - first_proc.logMonoTime) / 1e9
|
||||
print("------------------------------------------------")
|
||||
for proc_name, normal_cpu_usage in procs:
|
||||
try:
|
||||
first = [p for p in first_proc.procLog.procs if proc_name in p.cmdline][0]
|
||||
last = [p for p in last_proc.procLog.procs if proc_name in p.cmdline][0]
|
||||
cpu_time = cputime_total(last) - cputime_total(first)
|
||||
cpu_usage = cpu_time / dt * 100.
|
||||
if cpu_usage > max(normal_cpu_usage * 1.1, normal_cpu_usage + 5.0):
|
||||
print(f"Warning {proc_name} using more CPU than normal")
|
||||
r = 1
|
||||
|
||||
print(f"{proc_name.ljust(35)} {cpu_usage:.2f}%")
|
||||
except IndexError:
|
||||
print(f"{proc_name.ljust(35)} NO METRICS FOUND")
|
||||
r = 1
|
||||
print("------------------------------------------------")
|
||||
|
||||
return r
|
||||
|
|
|
@ -80,7 +80,7 @@ echo -n "1" > /data/params/d/HasCompletedSetup
|
|||
echo -n "1" > /data/params/d/CommunityFeaturesToggle
|
||||
|
||||
PYTHONPATH="$TARGET_DIR:$TARGET_DIR/pyextra" nosetests -s selfdrive/test/test_openpilot.py
|
||||
PYTHONPATH="$TARGET_DIR:$TARGET_DIR/pyextra" GET_CPU_USAGE=1 selfdrive/manager.py
|
||||
PYTHONPATH="$TARGET_DIR:$TARGET_DIR/pyextra" selfdrive/test/test_cpu_usage.py
|
||||
PYTHONPATH="$TARGET_DIR:$TARGET_DIR/pyextra" selfdrive/car/tests/test_car_interfaces.py
|
||||
|
||||
echo "[-] testing panda build T=$SECONDS"
|
||||
|
|
|
@ -320,6 +320,7 @@ selfdrive/test/__init__.py
|
|||
selfdrive/test/test_openpilot.py
|
||||
selfdrive/test/test_fingerprints.py
|
||||
selfdrive/test/test_car_models.py
|
||||
selfdrive/test/test_cpu_usage.py
|
||||
|
||||
selfdrive/ui/SConscript
|
||||
selfdrive/ui/*.cc
|
||||
|
|
|
@ -161,7 +161,6 @@ from selfdrive.loggerd.config import ROOT
|
|||
from selfdrive.launcher import launcher
|
||||
from common import android
|
||||
from common.apk import update_apks, pm_apply_packages, start_offroad
|
||||
from common.manager_helpers import print_cpu_usage
|
||||
|
||||
ThermalStatus = cereal.log.ThermalData.ThermalStatus
|
||||
|
||||
|
@ -428,9 +427,6 @@ def manager_thread():
|
|||
# now loop
|
||||
thermal_sock = messaging.sub_sock('thermal')
|
||||
|
||||
if os.getenv("GET_CPU_USAGE"):
|
||||
proc_sock = messaging.sub_sock('procLog', conflate=True)
|
||||
|
||||
cloudlog.info("manager start")
|
||||
cloudlog.info({"environ": os.environ})
|
||||
|
||||
|
@ -461,9 +457,6 @@ def manager_thread():
|
|||
|
||||
logger_dead = False
|
||||
|
||||
start_t = time.time()
|
||||
first_proc = None
|
||||
|
||||
while 1:
|
||||
msg = messaging.recv_sock(thermal_sock, wait=True)
|
||||
|
||||
|
@ -504,26 +497,6 @@ def manager_thread():
|
|||
if params.get("DoUninstall", encoding='utf8') == "1":
|
||||
break
|
||||
|
||||
if os.getenv("GET_CPU_USAGE"):
|
||||
dt = time.time() - start_t
|
||||
|
||||
# Get first sample
|
||||
if dt > 30 and first_proc is None:
|
||||
first_proc = messaging.recv_sock(proc_sock)
|
||||
|
||||
# Get last sample and exit
|
||||
if dt > 90:
|
||||
last_proc = messaging.recv_sock(proc_sock, wait=True)
|
||||
|
||||
all_running = all(running[p].is_alive() for p in car_started_processes)
|
||||
|
||||
cleanup_all_processes(None, None)
|
||||
return_code = print_cpu_usage(first_proc, last_proc)
|
||||
|
||||
if not all_running:
|
||||
return_code = 1
|
||||
sys.exit(return_code)
|
||||
|
||||
def manager_prepare(spinner=None):
|
||||
# build all processes
|
||||
os.chdir(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
#!/usr/bin/env python3
|
||||
import time
|
||||
import threading
|
||||
import _thread
|
||||
import signal
|
||||
import sys
|
||||
|
||||
import cereal.messaging as messaging
|
||||
import selfdrive.manager as manager
|
||||
|
||||
|
||||
def cputime_total(ct):
|
||||
return ct.cpuUser + ct.cpuSystem + ct.cpuChildrenUser + ct.cpuChildrenSystem
|
||||
|
||||
|
||||
def print_cpu_usage(first_proc, last_proc):
|
||||
procs = [
|
||||
("selfdrive.controls.controlsd", 59.46),
|
||||
("selfdrive.locationd.locationd", 34.38),
|
||||
("./loggerd", 28.49),
|
||||
("selfdrive.controls.plannerd", 19.77),
|
||||
("./_modeld", 12.74),
|
||||
("selfdrive.locationd.paramsd", 11.53),
|
||||
("selfdrive.controls.radard", 9.54),
|
||||
("./_ui", 9.54),
|
||||
("./camerad", 7.07),
|
||||
("selfdrive.locationd.calibrationd", 6.81),
|
||||
("./_sensord", 6.17),
|
||||
("selfdrive.monitoring.dmonitoringd", 5.48),
|
||||
("./boardd", 3.63),
|
||||
("./_dmonitoringmodeld", 2.67),
|
||||
("selfdrive.logmessaged", 2.71),
|
||||
("selfdrive.thermald.thermald", 2.41),
|
||||
("./proclogd", 1.54),
|
||||
("./_gpsd", 0.09),
|
||||
("./clocksd", 0.02),
|
||||
("./ubloxd", 0.02),
|
||||
("selfdrive.tombstoned", 0),
|
||||
("./logcatd", 0),
|
||||
]
|
||||
|
||||
r = 0
|
||||
dt = (last_proc.logMonoTime - first_proc.logMonoTime) / 1e9
|
||||
result = "------------------------------------------------\n"
|
||||
for proc_name, normal_cpu_usage in procs:
|
||||
try:
|
||||
first = [p for p in first_proc.procLog.procs if proc_name in p.cmdline][0]
|
||||
last = [p for p in last_proc.procLog.procs if proc_name in p.cmdline][0]
|
||||
cpu_time = cputime_total(last) - cputime_total(first)
|
||||
cpu_usage = cpu_time / dt * 100.
|
||||
if cpu_usage > max(normal_cpu_usage * 1.1, normal_cpu_usage + 5.0):
|
||||
result += f"Warning {proc_name} using more CPU than normal\n"
|
||||
r = 1
|
||||
elif cpu_usage < min(normal_cpu_usage * 0.3, max(normal_cpu_usage - 1.0, 0.0)):
|
||||
result += f"Warning {proc_name} using less CPU than normal\n"
|
||||
r = 1
|
||||
result += f"{proc_name.ljust(35)} {cpu_usage:.2f}%\n"
|
||||
except IndexError:
|
||||
result += f"{proc_name.ljust(35)} NO METRICS FOUND\n"
|
||||
r = 1
|
||||
result += "------------------------------------------------\n"
|
||||
print(result)
|
||||
return r
|
||||
|
||||
return_code = 1
|
||||
def test_thread():
|
||||
global return_code
|
||||
proc_sock = messaging.sub_sock('procLog', conflate=True)
|
||||
|
||||
# wait until everything's started and get first sample
|
||||
time.sleep(30)
|
||||
first_proc = messaging.recv_sock(proc_sock, wait=True)
|
||||
|
||||
# run for a minute and get last sample
|
||||
time.sleep(60)
|
||||
last_proc = messaging.recv_sock(proc_sock, wait=True)
|
||||
|
||||
running = manager.get_running()
|
||||
all_running = all(p in running and running[p].is_alive() for p in manager.car_started_processes)
|
||||
return_code = print_cpu_usage(first_proc, last_proc)
|
||||
if not all_running:
|
||||
return_code = 1
|
||||
_thread.interrupt_main()
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# setup signal handler to exit with test status
|
||||
def handle_exit(sig, frame):
|
||||
sys.exit(return_code)
|
||||
signal.signal(signal.SIGINT, handle_exit)
|
||||
|
||||
# start manager and test thread
|
||||
t = threading.Thread(target=test_thread)
|
||||
t.daemon = True
|
||||
t.start()
|
||||
manager.main()
|
Loading…
Reference in New Issue