Track CPU usage in CI (#1168)

* run openpilot for 60 seconds and report cpu

* Not passive

* Set training version

* Write cpuset

* Add community toggle

* Formatting

* Refactor

* Reorder processes

* Add max cpu

* Try this

* Try this to exit

* Does this work?

* This should work

* now it should work again

old-commit-hash: 3d2696c60b
This commit is contained in:
Willem Melching 2020-03-02 10:40:48 -08:00 committed by GitHub
parent 5a125fc7d6
commit 1076fb3da2
4 changed files with 91 additions and 2 deletions

50
common/manager_helpers.py Normal file
View File

@ -0,0 +1,50 @@
def cputime_total(ct):
return ct.cpuUser + ct.cpuSystem + ct.cpuChildrenUser + ct.cpuChildrenSystem
def print_cpu_usage(first_proc, last_proc):
r = 0
procs = [
("selfdrive.controls.controlsd", 59.46),
("./_modeld", 48.94),
("./loggerd", 28.49),
("selfdrive.controls.plannerd", 19.77),
("selfdrive.controls.radard", 9.54),
("./_ui", 9.54),
("./camerad", 7.07),
("selfdrive.locationd.locationd", 7.13),
("./_sensord", 6.17),
("selfdrive.controls.dmonitoringd", 5.48),
("./boardd", 3.63),
("./_dmonitoringmodeld", 2.67),
("selfdrive.logmessaged", 2.71),
("selfdrive.thermald", 2.41),
("./paramsd", 2.18),
("selfdrive.locationd.calibrationd", 1.76),
("./proclogd", 1.54),
("./_gpsd", 0.09),
("./clocksd", 0.02),
("./ubloxd", 0.02),
("selfdrive.tombstoned", 0),
("./logcatd", 0),
("selfdrive.updated", 0),
]
dt = (last_proc.logMonoTime - first_proc.logMonoTime) / 1e9
print("------------------------------------------------")
for proc_name, normal_cpu_usage in procs:
try:
first = [p for p in first_proc.procLog.procs if proc_name in p.cmdline][0]
last = [p for p in last_proc.procLog.procs if proc_name in p.cmdline][0]
cpu_time = cputime_total(last) - cputime_total(first)
cpu_usage = cpu_time / dt * 100.
if cpu_usage > max(normal_cpu_usage * 1.1, normal_cpu_usage + 5.0):
print(f"Warning {proc_name} using more CPU than normal")
r = 1
print(f"{proc_name.ljust(35)} {cpu_usage:.2f}%")
except IndexError:
print(f"{proc_name.ljust(35)} NO METRICS FOUND")
print("------------------------------------------------")
return r

View File

@ -1,8 +1,13 @@
#!/data/data/com.termux/files/usr/bin/bash -e
#!/usr/bin/env bash
set -e
mkdir -p /dev/shm
chmod 777 /dev/shm
# Write cpuset
echo $$ > /dev/cpuset/app/tasks
echo $PPID > /dev/cpuset/app/tasks
add_subtree() {
echo "[-] adding $2 subtree T=$SECONDS"
@ -99,7 +104,13 @@ git commit -a -m "openpilot v$VERSION release"
SCONS_CACHE=1 scons -j3
echo "[-] testing openpilot T=$SECONDS"
echo -n "0" > /data/params/d/Passive
echo -n "0.2.0" > /data/params/d/CompletedTrainingVersion
echo -n "1" > /data/params/d/HasCompletedSetup
echo -n "1" > /data/params/d/CommunityFeaturesToggle
PYTHONPATH="$TARGET_DIR:$TARGET_DIR/pyextra" nosetests -s selfdrive/test/test_openpilot.py
PYTHONPATH="$TARGET_DIR:$TARGET_DIR/pyextra" GET_CPU_USAGE=1 selfdrive/manager.py
echo "[-] testing panda build T=$SECONDS"
pushd panda/board/

View File

@ -34,6 +34,7 @@ common/cython_hacks.py
common/apk.py
common/SConscript
common/common_pyx_setup.py
common/manager_helpers.py
common/kalman/.gitignore
common/kalman/*

View File

@ -56,7 +56,10 @@ def unblock_stdout():
except (OSError, IOError, UnicodeDecodeError):
pass
os._exit(os.wait()[1])
# os.wait() returns a tuple with the pid and a 16 bit value
# whose low byte is the signal number and whose high byte is the exit satus
exit_status = os.wait()[1] >> 8
os._exit(exit_status)
if __name__ == "__main__":
@ -127,6 +130,7 @@ from selfdrive.loggerd.config import ROOT
from selfdrive.launcher import launcher
from common import android
from common.apk import update_apks, pm_apply_packages, start_frame
from common.manager_helpers import print_cpu_usage
ThermalStatus = cereal.log.ThermalData.ThermalStatus
@ -383,6 +387,9 @@ def manager_thread():
# now loop
thermal_sock = messaging.sub_sock('thermal')
if os.getenv("GET_CPU_USAGE"):
proc_sock = messaging.sub_sock('procLog', conflate=True)
cloudlog.info("manager start")
cloudlog.info({"environ": os.environ})
@ -413,6 +420,9 @@ def manager_thread():
logger_dead = False
start_t = time.time()
first_proc = None
while 1:
msg = messaging.recv_sock(thermal_sock, wait=True)
@ -448,6 +458,21 @@ def manager_thread():
if params.get("DoUninstall", encoding='utf8') == "1":
break
if os.getenv("GET_CPU_USAGE"):
dt = time.time() - start_t
# Get first sample
if dt > 30 and first_proc is None:
first_proc = messaging.recv_sock(proc_sock)
# Get last sample and exit
if dt > 90:
first_proc = first_proc
last_proc = messaging.recv_sock(proc_sock, wait=True)
cleanup_all_processes(None, None)
sys.exit(print_cpu_usage(first_proc, last_proc))
def manager_prepare(spinner=None):
# build all processes
os.chdir(os.path.dirname(os.path.abspath(__file__)))
@ -524,6 +549,8 @@ def main():
try:
manager_thread()
except SystemExit:
raise
except Exception:
traceback.print_exc()
crash.capture_exception()