mirror of https://github.com/commaai/openpilot.git
exec DM model with gpu (#33397)
* half old-commit-hash: 9f72eca003d4637ca7fe847ebaf925c694fc2e84 * optimed old-commit-hash: 6e36e2a12e09275ec21d1590012a92b05ca52ff5 * thneed old-commit-hash: 419a06c09c0c767d828bcd1e118bc575898c343a * exec old-commit-hash: 0059c27ec11b076a37f65d604ed135ea6541b1a6 * runner old-commit-hash: 34232ada94450ce541eaef546197fa219810a891 * runs but old-commit-hash: 3db37c00b6a64908293b4de8d8b56e80308cd8f2 * it is 01 old-commit-hash: a160d81eb1a7e77abbef959b44f602610f68f665 * np old-commit-hash: c1caff6ba648cc2c0094c71b2ea074f01c3c2dc8 * module url old-commit-hash: 6f4902c4d384263a53e2c1d14d93b5ff864b6a5f * new old-commit-hash: 779ae79b1bc3df6374fb6663ac8592e107a6e504 * ds fast * is this work * corcention * real timing * no reg * interim gather * 0e4a9c7b * fa69be01, and halve * list * cleanup * slighly faster * setprotlt * expected * replay ref * more powar * reluctantly * bump tg * 8 * less * less * bump tg * better than exp * closer * cc * see diff * commits * was right * to 32 cast * remove dlc file * support both --------- Co-authored-by: Comma Device <device@comma.ai>
This commit is contained in:
parent
bdb4b87fa5
commit
f79aca8e1e
|
@ -2,7 +2,6 @@
|
|||
|
||||
# to move existing files into LFS:
|
||||
# git add --renormalize .
|
||||
*.dlc filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.svg filter=lfs diff=lfs merge=lfs -text
|
||||
*.png filter=lfs diff=lfs merge=lfs -text
|
||||
|
|
|
@ -69,6 +69,10 @@ if arch == "larch64" or GetOption('pc_thneed'):
|
|||
|
||||
lenv.Command(fn + ".thneed", [fn + ".onnx"] + tinygrad_files, cmd)
|
||||
|
||||
fn_dm = File("models/dmonitoring_model").abspath
|
||||
cmd = f"cd {Dir('#').abspath}/tinygrad_repo && " + ' '.join(tinygrad_opts) + f" python3 openpilot/compile2.py {fn_dm}.onnx {fn_dm}.thneed"
|
||||
lenv.Command(fn_dm + ".thneed", [fn_dm + ".onnx"] + tinygrad_files, cmd)
|
||||
|
||||
thneed_lib = env.SharedLibrary('thneed', thneed_src, LIBS=[gpucommon, common, 'OpenCL', 'dl'])
|
||||
thneedmodel_lib = env.Library('thneedmodel', ['runners/thneedmodel.cc'])
|
||||
lenvCython.Program('runners/thneedmodel_pyx.so', 'runners/thneedmodel_pyx.pyx', LIBS=envCython["LIBS"]+[thneedmodel_lib, thneed_lib, gpucommon, common, 'dl', 'OpenCL'])
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null && pwd)"
|
||||
cd "$DIR/../../"
|
||||
|
||||
if [ -f "$DIR/libthneed.so" ]; then
|
||||
export LD_PRELOAD="$DIR/libthneed.so"
|
||||
fi
|
||||
|
||||
exec "$DIR/dmonitoringmodeld.py" "$@"
|
|
@ -6,6 +6,7 @@ import time
|
|||
import ctypes
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
from setproctitle import setproctitle
|
||||
|
||||
from cereal import messaging
|
||||
from cereal.messaging import PubMaster, SubMaster
|
||||
|
@ -14,16 +15,18 @@ from openpilot.common.swaglog import cloudlog
|
|||
from openpilot.common.params import Params
|
||||
from openpilot.common.realtime import set_realtime_priority
|
||||
from openpilot.selfdrive.modeld.runners import ModelRunner, Runtime
|
||||
from openpilot.selfdrive.modeld.models.commonmodel_pyx import sigmoid
|
||||
from openpilot.selfdrive.modeld.models.commonmodel_pyx import sigmoid, CLContext
|
||||
|
||||
CALIB_LEN = 3
|
||||
REG_SCALE = 0.25
|
||||
MODEL_WIDTH = 1440
|
||||
MODEL_HEIGHT = 960
|
||||
OUTPUT_SIZE = 84
|
||||
FEATURE_LEN = 512
|
||||
OUTPUT_SIZE = 84 + FEATURE_LEN
|
||||
|
||||
PROCESS_NAME = "selfdrive.modeld.dmonitoringmodeld"
|
||||
SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')
|
||||
MODEL_PATHS = {
|
||||
ModelRunner.SNPE: Path(__file__).parent / 'models/dmonitoring_model_q.dlc',
|
||||
ModelRunner.THNEED: Path(__file__).parent / 'models/dmonitoring_model.thneed',
|
||||
ModelRunner.ONNX: Path(__file__).parent / 'models/dmonitoring_model.onnx'}
|
||||
|
||||
class DriverStateResult(ctypes.Structure):
|
||||
|
@ -49,21 +52,22 @@ class DMonitoringModelResult(ctypes.Structure):
|
|||
("driver_state_lhd", DriverStateResult),
|
||||
("driver_state_rhd", DriverStateResult),
|
||||
("poor_vision_prob", ctypes.c_float),
|
||||
("wheel_on_right_prob", ctypes.c_float)]
|
||||
("wheel_on_right_prob", ctypes.c_float),
|
||||
("features", ctypes.c_float*FEATURE_LEN)]
|
||||
|
||||
class ModelState:
|
||||
inputs: dict[str, np.ndarray]
|
||||
output: np.ndarray
|
||||
model: ModelRunner
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, cl_ctx):
|
||||
assert ctypes.sizeof(DMonitoringModelResult) == OUTPUT_SIZE * ctypes.sizeof(ctypes.c_float)
|
||||
self.output = np.zeros(OUTPUT_SIZE, dtype=np.float32)
|
||||
self.inputs = {
|
||||
'input_img': np.zeros(MODEL_HEIGHT * MODEL_WIDTH, dtype=np.uint8),
|
||||
'calib': np.zeros(CALIB_LEN, dtype=np.float32)}
|
||||
|
||||
self.model = ModelRunner(MODEL_PATHS, self.output, Runtime.DSP, True, None)
|
||||
self.model = ModelRunner(MODEL_PATHS, self.output, Runtime.GPU, False, cl_ctx)
|
||||
self.model.addInput("input_img", None)
|
||||
self.model.addInput("calib", self.inputs['calib'])
|
||||
|
||||
|
@ -76,17 +80,17 @@ class ModelState:
|
|||
input_data = self.inputs['input_img'].reshape(MODEL_HEIGHT, MODEL_WIDTH)
|
||||
input_data[:] = buf_data[v_offset:v_offset+MODEL_HEIGHT, h_offset:h_offset+MODEL_WIDTH]
|
||||
|
||||
t1 = time.perf_counter()
|
||||
self.model.setInputBuffer("input_img", self.inputs['input_img'].view(np.float32))
|
||||
t1 = time.perf_counter()
|
||||
self.model.execute()
|
||||
t2 = time.perf_counter()
|
||||
return self.output, t2 - t1
|
||||
|
||||
|
||||
def fill_driver_state(msg, ds_result: DriverStateResult):
|
||||
msg.faceOrientation = [x * REG_SCALE for x in ds_result.face_orientation]
|
||||
msg.faceOrientation = list(ds_result.face_orientation)
|
||||
msg.faceOrientationStd = [math.exp(x) for x in ds_result.face_orientation_std]
|
||||
msg.facePosition = [x * REG_SCALE for x in ds_result.face_position[:2]]
|
||||
msg.facePosition = list(ds_result.face_position[:2])
|
||||
msg.facePositionStd = [math.exp(x) for x in ds_result.face_position_std[:2]]
|
||||
msg.faceProb = sigmoid(ds_result.face_prob)
|
||||
msg.leftEyeProb = sigmoid(ds_result.left_eye_prob)
|
||||
|
@ -115,14 +119,16 @@ def get_driverstate_packet(model_output: np.ndarray, frame_id: int, location_ts:
|
|||
|
||||
def main():
|
||||
gc.disable()
|
||||
setproctitle(PROCESS_NAME)
|
||||
set_realtime_priority(1)
|
||||
|
||||
model = ModelState()
|
||||
cl_context = CLContext()
|
||||
model = ModelState(cl_context)
|
||||
cloudlog.warning("models loaded, dmonitoringmodeld starting")
|
||||
Params().put_bool("DmModelInitialized", True)
|
||||
|
||||
cloudlog.warning("connecting to driver stream")
|
||||
vipc_client = VisionIpcClient("camerad", VisionStreamType.VISION_STREAM_DRIVER, True)
|
||||
vipc_client = VisionIpcClient("camerad", VisionStreamType.VISION_STREAM_DRIVER, True, cl_context)
|
||||
while not vipc_client.connect(False):
|
||||
time.sleep(0.1)
|
||||
assert vipc_client.is_connected()
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
5ec97a39-0095-4cea-adfa-6d72b1966cc1
|
||||
26cac7a9757a27c783a365403040a1bd27ccdaea
|
||||
fa69be01-b430-4504-9d72-7dcb058eb6dd
|
||||
d9fb22d1c4fa3ca3d201dbc8edf1d0f0918e53e6
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3dd3982940d823c4fbb0429b733a0b78b0688d7d67aa76ff7b754a3e2f3d8683
|
||||
size 16132780
|
||||
oid sha256:50efe6451a3fb3fa04b6bb0e846544533329bd46ecefe9e657e91214dee2aaeb
|
||||
size 7196502
|
||||
|
|
|
@ -1,3 +0,0 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7c26f13816b143f5bb29ac2980f8557bd5687a75729e4d895313fb9a5a1f0f46
|
||||
size 4488449
|
|
@ -14,8 +14,12 @@ def attributeproto_fp16_to_fp32(attr):
|
|||
attr.data_type = 1
|
||||
attr.raw_data = float32_list.astype(np.float32).tobytes()
|
||||
|
||||
def convert_fp16_to_fp32(path):
|
||||
model = onnx.load(path)
|
||||
def convert_fp16_to_fp32(onnx_path_or_bytes):
|
||||
if isinstance(onnx_path_or_bytes, bytes):
|
||||
model = onnx.load_from_string(onnx_path_or_bytes)
|
||||
elif isinstance(onnx_path_or_bytes, str):
|
||||
model = onnx.load(onnx_path_or_bytes)
|
||||
|
||||
for i in model.graph.initializer:
|
||||
if i.data_type == 10:
|
||||
attributeproto_fp16_to_fp32(i)
|
||||
|
@ -23,6 +27,8 @@ def convert_fp16_to_fp32(path):
|
|||
if i.type.tensor_type.elem_type == 10:
|
||||
i.type.tensor_type.elem_type = 1
|
||||
for i in model.graph.node:
|
||||
if i.op_type == 'Cast' and i.attribute[0].i == 10:
|
||||
i.attribute[0].i = 1
|
||||
for a in i.attribute:
|
||||
if hasattr(a, 't'):
|
||||
if a.t.data_type == 10:
|
||||
|
@ -61,7 +67,6 @@ class ONNXModel(RunModel):
|
|||
def __init__(self, path, output, runtime, use_tf8, cl_context):
|
||||
self.inputs = {}
|
||||
self.output = output
|
||||
self.use_tf8 = use_tf8
|
||||
|
||||
self.session = create_ort_session(path, fp16_to_fp32=True)
|
||||
self.input_names = [x.name for x in self.session.get_inputs()]
|
||||
|
@ -85,7 +90,7 @@ class ONNXModel(RunModel):
|
|||
return None
|
||||
|
||||
def execute(self):
|
||||
inputs = {k: (v.view(np.uint8) / 255. if self.use_tf8 and k == 'input_img' else v) for k,v in self.inputs.items()}
|
||||
inputs = {k: v.view(self.input_dtypes[k]) for k,v in self.inputs.items()}
|
||||
inputs = {k: v.reshape(self.input_shapes[k]).astype(self.input_dtypes[k]) for k,v in inputs.items()}
|
||||
outputs = self.session.run(None, inputs)
|
||||
assert len(outputs) == 1, "Only single model outputs are supported"
|
||||
|
|
|
@ -33,8 +33,8 @@ class DRIVER_MONITOR_SETTINGS:
|
|||
self._SG_THRESHOLD = 0.9
|
||||
self._BLINK_THRESHOLD = 0.865
|
||||
|
||||
self._EE_THRESH11 = 0.25
|
||||
self._EE_THRESH12 = 7.5
|
||||
self._EE_THRESH11 = 0.4
|
||||
self._EE_THRESH12 = 15.0
|
||||
self._EE_MAX_OFFSET1 = 0.06
|
||||
self._EE_MIN_OFFSET1 = 0.025
|
||||
self._EE_THRESH21 = 0.01
|
||||
|
|
|
@ -1 +1 @@
|
|||
32fe8cf4a0daa8d10a689c9ae2e51a879151c87c
|
||||
91d1089681f427a3916b42984d5df04eb94a0b90
|
||||
|
|
|
@ -32,6 +32,7 @@ CPU usage budget
|
|||
* total CPU usage of openpilot (sum(PROCS.values())
|
||||
should not exceed MAX_TOTAL_CPU
|
||||
"""
|
||||
|
||||
MAX_TOTAL_CPU = 265. # total for all 8 cores
|
||||
PROCS = {
|
||||
# Baseline CPU usage by process
|
||||
|
@ -312,7 +313,7 @@ class TestOnroad:
|
|||
assert max(mems) - min(mems) <= 3.0
|
||||
|
||||
def test_gpu_usage(self):
|
||||
assert self.gpu_procs == {"weston", "ui", "camerad", "selfdrive.modeld.modeld"}
|
||||
assert self.gpu_procs == {"weston", "ui", "camerad", "selfdrive.modeld.modeld", "selfdrive.modeld.dmonitoringmodeld"}
|
||||
|
||||
def test_camera_processing_time(self):
|
||||
result = "\n"
|
||||
|
|
|
@ -34,7 +34,7 @@ class Proc:
|
|||
PROCS = [
|
||||
Proc(['camerad'], 2.1, msgs=['roadCameraState', 'wideRoadCameraState', 'driverCameraState']),
|
||||
Proc(['modeld'], 1.12, atol=0.2, msgs=['modelV2']),
|
||||
Proc(['dmonitoringmodeld'], 0.4, msgs=['driverStateV2']),
|
||||
Proc(['dmonitoringmodeld'], 0.5, msgs=['driverStateV2']),
|
||||
Proc(['encoderd'], 0.23, msgs=[]),
|
||||
]
|
||||
|
||||
|
|
|
@ -63,7 +63,7 @@ procs = [
|
|||
PythonProcess("micd", "system.micd", iscar),
|
||||
PythonProcess("timed", "system.timed", always_run, enabled=not PC),
|
||||
|
||||
PythonProcess("dmonitoringmodeld", "selfdrive.modeld.dmonitoringmodeld", driverview, enabled=(not PC or WEBCAM)),
|
||||
NativeProcess("dmonitoringmodeld", "selfdrive/modeld", ["./dmonitoringmodeld"], driverview, enabled=(not PC or WEBCAM)),
|
||||
NativeProcess("encoderd", "system/loggerd", ["./encoderd"], only_onroad),
|
||||
NativeProcess("stream_encoderd", "system/loggerd", ["./encoderd", "--stream"], notcar),
|
||||
NativeProcess("loggerd", "system/loggerd", ["./loggerd"], logging),
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit f51aa0fc7cdbac710e640172db280cfb747d2718
|
||||
Subproject commit 3e15fa0daefae75e2ddef98f82be5b5d37820631
|
Loading…
Reference in New Issue