Improve on-device CI reliability (#1922)

This commit is contained in:
Adeeb Shihadeh 2020-07-25 02:12:19 -07:00 committed by GitHub
parent e0e7c7486d
commit 90fc1c6028
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 115 additions and 169 deletions

84
Jenkinsfile vendored
View File

@ -1,3 +1,33 @@
def phone(String ip, String step_label, String cmd) {
def ci_env = "CI=1 TEST_DIR=${env.TEST_DIR} GIT_BRANCH=${env.GIT_BRANCH} GIT_COMMIT=${env.GIT_COMMIT}"
withCredentials([file(credentialsId: 'id_rsa_public', variable: 'key_file')]) {
sh label: step_label,
script: """
ssh -tt -o StrictHostKeyChecking=no -i ${key_file} -p 8022 root@${ip} '${ci_env} /usr/bin/bash -le' <<'EOF'
echo \$\$ > /dev/cpuset/app/tasks || true
echo \$PPID > /dev/cpuset/app/tasks || true
mkdir -p /dev/shm
chmod 777 /dev/shm
cd ${env.TEST_DIR} || true
${cmd}
exit 0
EOF"""
}
}
def phone_steps(String device_type, steps) {
lock(resource: "", label: device_type, inversePrecedence: true, variable: 'device_ip', quantity: 1) {
timeout(time: 60, unit: 'MINUTES') {
phone(device_ip, "kill old processes", "pkill -f comma || true")
phone(device_ip, "git checkout", readFile("selfdrive/test/setup_device_ci.sh"),)
steps.each { item ->
phone(device_ip, item[0], item[1])
}
}
}
}
pipeline {
agent {
docker {
@ -7,6 +37,7 @@ pipeline {
}
environment {
COMMA_JWT = credentials('athena-test-jwt')
TEST_DIR = "/data/openpilot"
}
stages {
@ -16,14 +47,9 @@ pipeline {
branch 'devel-staging'
}
steps {
lock(resource: "", label: 'eon-build', inversePrecedence: true, variable: 'eon_ip', quantity: 1){
timeout(time: 60, unit: 'MINUTES') {
dir(path: 'selfdrive/test') {
sh 'pip install paramiko'
sh 'python phone_ci.py "cd release && PUSH=1 ./build_release2.sh"'
}
}
}
phone_steps("eon-build", [
["build release2-staging and dashcam-staging", "cd release && PUSH=1 ./build_release2.sh"],
])
}
}
@ -42,48 +68,36 @@ pipeline {
environment {
CI_PUSH = "${env.BRANCH_NAME == 'master' ? 'master-ci' : ' '}"
}
steps {
lock(resource: "", label: 'eon', inversePrecedence: true, variable: 'eon_ip', quantity: 1){
timeout(time: 60, unit: 'MINUTES') {
dir(path: 'selfdrive/test') {
sh 'pip install paramiko'
sh 'python phone_ci.py "cd release && ./build_devel.sh"'
}
}
}
phone_steps("eon", [
["build devel", "cd release && CI_PUSH=${env.CI_PUSH} ./build_devel.sh"],
["test openpilot", "nosetests -s selfdrive/test/test_openpilot.py"],
//["test cpu usage", "cd selfdrive/test/ && ./test_cpu_usage.py"],
["test car interfaces", "cd selfdrive/car/tests/ && ./test_car_interfaces.py"],
])
}
}
stage('Replay Tests') {
steps {
lock(resource: "", label: 'eon2', inversePrecedence: true, variable: 'eon_ip', quantity: 1){
timeout(time: 60, unit: 'MINUTES') {
dir(path: 'selfdrive/test') {
sh 'pip install paramiko'
sh 'python phone_ci.py "cd selfdrive/test/process_replay && ./camera_replay.py"'
}
}
}
phone_steps("eon2", [
["camerad/modeld replay", "cd selfdrive/test/process_replay && ./camera_replay.py"],
])
}
}
stage('HW Tests') {
steps {
lock(resource: "", label: 'eon', inversePrecedence: true, variable: 'eon_ip', quantity: 1){
timeout(time: 60, unit: 'MINUTES') {
dir(path: 'selfdrive/test') {
sh 'pip install paramiko'
sh 'python phone_ci.py "SCONS_CACHE=1 scons -j3 cereal/ && \
nosetests -s selfdrive/test/test_sounds.py && \
nosetests -s selfdrive/boardd/tests/test_boardd_loopback.py"'
}
}
}
phone_steps("eon", [
["build cereal", "SCONS_CACHE=1 scons -j4 cereal/"],
["test sounds", "nosetests -s selfdrive/test/test_sounds.py"],
["test boardd loopback", "nosetests -s selfdrive/boardd/tests/test_boardd_loopback.py"],
])
}
}
}
}
}
}

View File

@ -1,13 +1,4 @@
#!/usr/bin/env bash
set -e
mkdir -p /dev/shm
chmod 777 /dev/shm
# Write cpuset
echo $$ > /dev/cpuset/app/tasks
echo $PPID > /dev/cpuset/app/tasks
#!/usr/bin/bash -e
SOURCE_DIR=/data/openpilot_source
TARGET_DIR=/data/openpilot
@ -18,7 +9,7 @@ export GIT_COMMITTER_NAME="Vehicle Researcher"
export GIT_COMMITTER_EMAIL="user@comma.ai"
export GIT_AUTHOR_NAME="Vehicle Researcher"
export GIT_AUTHOR_EMAIL="user@comma.ai"
export GIT_SSH_COMMAND="ssh -i /tmp/deploy_key"
export GIT_SSH_COMMAND="ssh -i /data/gitkey"
echo "[-] Setting up repo T=$SECONDS"
if [ ! -d "$TARGET_DIR" ]; then
@ -73,16 +64,6 @@ git commit -a -m "openpilot v$VERSION release"
# Run build
SCONS_CACHE=1 scons -j3
echo "[-] testing openpilot T=$SECONDS"
echo -n "0" > /data/params/d/Passive
echo -n "0.2.0" > /data/params/d/CompletedTrainingVersion
echo -n "1" > /data/params/d/HasCompletedSetup
echo -n "1" > /data/params/d/CommunityFeaturesToggle
PYTHONPATH="$TARGET_DIR:$TARGET_DIR/pyextra" nosetests -s selfdrive/test/test_openpilot.py
PYTHONPATH="$TARGET_DIR:$TARGET_DIR/pyextra" selfdrive/test/test_cpu_usage.py
PYTHONPATH="$TARGET_DIR:$TARGET_DIR/pyextra" selfdrive/car/tests/test_car_interfaces.py
echo "[-] testing panda build T=$SECONDS"
pushd panda/board/
make bin
@ -99,10 +80,4 @@ if [ ! -z "$CI_PUSH" ]; then
git push -f origin master-ci:$CI_PUSH
fi
echo "[-] done pushing T=$SECONDS"
# reset version
cd $SOURCE_DIR
git checkout -- selfdrive/common/version.h
echo "[-] done T=$SECONDS"

View File

@ -312,9 +312,8 @@ selfdrive/thermald/thermald.py
selfdrive/thermald/power_monitoring.py
selfdrive/test/__init__.py
selfdrive/test/id_rsa
selfdrive/test/helpers.py
selfdrive/test/phone_ci.py
selfdrive/test/setup_device_ci.sh
selfdrive/test/test_openpilot.py
selfdrive/test/test_fingerprints.py
selfdrive/test/test_cpu_usage.py

View File

@ -4,8 +4,19 @@ from nose.tools import nottest
from common.android import ANDROID
from common.apk import update_apks, start_offroad, pm_apply_packages, android_packages
from common.params import Params
from selfdrive.version import training_version
from selfdrive.manager import start_managed_process, kill_managed_process, get_running
def set_params_enabled():
params = Params()
params.put("HasAcceptedTerms", "1")
params.put("HasCompletedSetup", "1")
params.put("OpenpilotEnabledToggle", "1")
params.put("CommunityFeaturesToggle", "1")
params.put("Passive", "0")
params.put("CompletedTrainingVersion", training_version)
def phone_only(x):
if ANDROID:
return x

View File

@ -1,98 +0,0 @@
#!/usr/bin/env python3
import paramiko # pylint: disable=import-error
import os
import sys
import re
import time
import socket
SOURCE_DIR = "/data/openpilot_source/"
TEST_DIR = "/data/openpilot/"
def run_on_phone(test_cmd):
eon_ip = os.environ.get('eon_ip', None)
if eon_ip is None:
raise Exception("'eon_ip' not set")
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
key_file = open(os.path.join(os.path.dirname(__file__), "id_rsa"))
key = paramiko.RSAKey.from_private_key(key_file)
print("SSH to phone at {}".format(eon_ip))
# try connecting for one minute
t_start = time.time()
while True:
try:
ssh.connect(hostname=eon_ip, port=8022, pkey=key, timeout=10)
except (paramiko.ssh_exception.SSHException, socket.timeout, paramiko.ssh_exception.NoValidConnectionsError):
print("Connection failed")
if time.time() - t_start > 60:
raise
else:
break
time.sleep(1)
branch = os.environ['GIT_BRANCH']
commit = os.environ.get('GIT_COMMIT', branch)
conn = ssh.invoke_shell()
# pass in all environment variables prefixed with 'CI_'
for k, v in os.environ.items():
if k.startswith("CI_") or k in ["GIT_BRANCH", "GIT_COMMIT"]:
conn.send(f"export {k}='{v}'\n")
conn.send("export CI=1\n")
# clear scons cache dirs that haven't been written to in one day
conn.send("cd /tmp && find -name 'scons_cache_*' -type d -maxdepth 1 -mtime 1 -exec rm -rf '{}' \\;\n")
# set up environment
conn.send(f"cd {SOURCE_DIR}\n")
conn.send("git reset --hard\n")
conn.send("git fetch origin\n")
conn.send("find . -maxdepth 1 -not -path './.git' -not -name '.' -not -name '..' -exec rm -rf '{}' \\;\n")
conn.send(f"git reset --hard {commit}\n")
conn.send(f"git checkout {commit}\n")
conn.send("git clean -xdf\n")
conn.send("git submodule update --init\n")
conn.send("git submodule foreach --recursive git reset --hard\n")
conn.send("git submodule foreach --recursive git clean -xdf\n")
conn.send('echo "git took $SECONDS seconds"\n')
conn.send(f"rsync -a --delete {SOURCE_DIR} {TEST_DIR}\n")
# run the test
conn.send(test_cmd + "\n")
# get the result and print it back out
conn.send('echo "RESULT:" $?\n')
conn.send("exit\n")
dat = b""
conn.settimeout(240)
while True:
try:
recvd = conn.recv(4096)
except socket.timeout:
print("connection to phone timed out")
sys.exit(1)
if len(recvd) == 0:
break
dat += recvd
sys.stdout.buffer.write(recvd)
sys.stdout.flush()
return_code = int(re.findall(rb'^RESULT: (\d+)', dat[-1024:], flags=re.MULTILINE)[0])
sys.exit(return_code)
if __name__ == "__main__":
run_on_phone(sys.argv[1])

View File

@ -0,0 +1,35 @@
#!/usr/bin/bash -e
export SOURCE_DIR="/data/openpilot_source/"
if [ -z "$GIT_COMMIT" ]; then
echo "GIT_COMMIT must be set"
exit 1
fi
if [ -z "$TEST_DIR" ]; then
echo "TEST_DIR must be set"
exit 1
fi
# TODO: never clear qcom_replay cache
# clear scons cache dirs that haven't been written to in one day
cd /tmp && find -name 'scons_cache_*' -type d -maxdepth 1 -mtime 1 -exec rm -rf '{}' \;
# set up environment
cd $SOURCE_DIR
git reset --hard
git fetch origin
find . -maxdepth 1 -not -path './.git' -not -name '.' -not -name '..' -exec rm -rf '{}' \;
git reset --hard $GIT_COMMIT
git checkout $GIT_COMMIT
git clean -xdf
git submodule update --init
git submodule foreach --recursive git reset --hard
git submodule foreach --recursive git clean -xdf
echo "git checkout took $SECONDS seconds"
rsync -a --delete $SOURCE_DIR $TEST_DIR
echo "$TEST_DIR synced with $GIT_COMMIT, took $SECONDS seconds"

View File

@ -6,8 +6,9 @@ import signal
import sys
import cereal.messaging as messaging
from common.params import Params
import selfdrive.manager as manager
from selfdrive.test.helpers import set_params_enabled
def cputime_total(ct):
return ct.cpuUser + ct.cpuSystem + ct.cpuChildrenUser + ct.cpuChildrenSystem
@ -70,13 +71,18 @@ return_code = 1
def test_thread():
try:
global return_code
proc_sock = messaging.sub_sock('procLog', conflate=True, timeout=1000)
proc_sock = messaging.sub_sock('procLog', conflate=True, timeout=2000)
# wait until everything's started and get first sample
time.sleep(30)
start_time = time.monotonic()
while time.monotonic() - start_time < 120:
if Params().get("CarParams") is not None:
break
time.sleep(2)
first_proc = messaging.recv_sock(proc_sock, wait=True)
if first_proc is None or not all_running():
print("\n\nTEST FAILED: all car started processes not running\n\n")
err_msg = "procLog recv timed out" if first_proc is None else "all car started process not running"
print(f"\n\nTEST FAILED: {err_msg}\n\n")
raise Exception
# run for a minute and get last sample
@ -90,12 +96,16 @@ def test_thread():
if __name__ == "__main__":
# setup signal handler to exit with test status
def handle_exit(sig, frame):
sys.exit(return_code)
signal.signal(signal.SIGINT, handle_exit)
# start manager and test thread
set_params_enabled()
Params().delete("CarParams")
t = threading.Thread(target=test_thread)
t.daemon = True
t.start()

View File

@ -5,7 +5,7 @@ os.environ['FAKEUPLOAD'] = "1"
from common.params import Params
from common.realtime import sec_since_boot
from selfdrive.manager import manager_init, manager_prepare, start_daemon_process
from selfdrive.test.helpers import phone_only, with_processes
from selfdrive.test.helpers import phone_only, with_processes, set_params_enabled
import json
import requests
import signal
@ -16,6 +16,7 @@ import time
# must run first
@phone_only
def test_manager_prepare():
set_params_enabled()
manager_init()
manager_prepare()

View File

@ -1 +0,0 @@
../../../selfdrive/test/id_rsa