Model: Sync modeld with upstream arbitrary vision inputs (#1004)

* Prepare sunnypilot modeld refactor:

* This is needed to work with latest vision models from comma master.

* Add this for dev-c3-new
This commit is contained in:
James Vecellio-Grant
2025-06-19 08:24:20 -06:00
committed by GitHub
parent 6b9c63acbe
commit d62c3cdef9
6 changed files with 36 additions and 29 deletions

View File

@@ -13,7 +13,6 @@ common_src = [
"transforms/transform.cc",
]
# OpenCL is a framework on Mac
if arch == "Darwin":
frameworks += ['OpenCL']
@@ -29,4 +28,3 @@ for pathdef, fn in {'TRANSFORM': 'transforms/transform.cl', 'LOADYUV': 'transfor
cython_libs = envCython["LIBS"] + libs
commonmodel_lib = lenv.Library('commonmodel', common_src)
lenvCython.Program('models/commonmodel_pyx.so', 'models/commonmodel_pyx.pyx', LIBS=[commonmodel_lib, *cython_libs], FRAMEWORKS=frameworks)

View File

@@ -118,30 +118,14 @@ def fill_model_msg(base_msg: capnp._DynamicStructBuilder, extended_msg: capnp._D
# action (includes lateral planning now)
modelV2.action = action
# times at X_IDXS according to model plan
PLAN_T_IDXS = [np.nan] * ModelConstants.IDX_N
PLAN_T_IDXS[0] = 0.0
plan_x = net_output_data['plan'][0,:,Plan.POSITION][:,0].tolist()
for xidx in range(1, ModelConstants.IDX_N):
tidx = 0
# increment tidx until we find an element that's further away than the current xidx
while tidx < ModelConstants.IDX_N - 1 and plan_x[tidx+1] < ModelConstants.X_IDXS[xidx]:
tidx += 1
if tidx == ModelConstants.IDX_N - 1:
# if the Plan doesn't extend far enough, set plan_t to the max value (10s), then break
PLAN_T_IDXS[xidx] = ModelConstants.T_IDXS[ModelConstants.IDX_N - 1]
break
# interpolate to find `t` for the current xidx
current_x_val = plan_x[tidx]
next_x_val = plan_x[tidx+1]
p = (ModelConstants.X_IDXS[xidx] - current_x_val) / (next_x_val - current_x_val) if abs(next_x_val - current_x_val) > 1e-9 else float('nan')
PLAN_T_IDXS[xidx] = p * ModelConstants.T_IDXS[tidx+1] + (1 - p) * ModelConstants.T_IDXS[tidx]
# times at X_IDXS of edges and lines aren't used
LINE_T_IDXS: list[float] = []
# lane lines
modelV2.init('laneLines', 4)
for i in range(4):
lane_line = modelV2.laneLines[i]
fill_xyzt(lane_line, PLAN_T_IDXS, np.array(ModelConstants.X_IDXS), net_output_data['lane_lines'][0,i,:,0], net_output_data['lane_lines'][0,i,:,1])
fill_xyzt(lane_line, LINE_T_IDXS, np.array(ModelConstants.X_IDXS), net_output_data['lane_lines'][0,i,:,0], net_output_data['lane_lines'][0,i,:,1])
modelV2.laneLineStds = net_output_data['lane_lines_stds'][0,:,0,0].tolist()
modelV2.laneLineProbs = net_output_data['lane_lines_prob'][0,1::2].tolist()
@@ -151,7 +135,7 @@ def fill_model_msg(base_msg: capnp._DynamicStructBuilder, extended_msg: capnp._D
modelV2.init('roadEdges', 2)
for i in range(2):
road_edge = modelV2.roadEdges[i]
fill_xyzt(road_edge, PLAN_T_IDXS, np.array(ModelConstants.X_IDXS), net_output_data['road_edges'][0,i,:,0], net_output_data['road_edges'][0,i,:,1])
fill_xyzt(road_edge, LINE_T_IDXS, np.array(ModelConstants.X_IDXS), net_output_data['road_edges'][0,i,:,0], net_output_data['road_edges'][0,i,:,1])
modelV2.roadEdgeStds = net_output_data['road_edges_stds'][0,:,0,0].tolist()
# leads

View File

@@ -62,7 +62,7 @@ class ModelState:
self.MIN_LAT_CONTROL_SPEED = 0.3
buffer_length = 5 if self.model_runner.is_20hz else 2
self.frames = {'input_imgs': DrivingModelFrame(context, buffer_length), 'big_input_imgs': DrivingModelFrame(context, buffer_length)}
self.frames = {name: DrivingModelFrame(context, buffer_length) for name in self.model_runner.vision_input_names}
self.prev_desire = np.zeros(self.constants.DESIRE_LEN, dtype=np.float32)
# img buffers are managed in openCL transform code
@@ -86,7 +86,7 @@ class ModelState:
self.desire_reshape_dims = (self.numpy_inputs['desire'].shape[0], self.numpy_inputs['desire'].shape[1], -1,
self.numpy_inputs['desire'].shape[2])
def run(self, buf: VisionBuf, wbuf: VisionBuf, transform: np.ndarray, transform_wide: np.ndarray,
def run(self, bufs: dict[str, VisionBuf], transforms: dict[str, np.ndarray],
inputs: dict[str, np.ndarray], prepare_only: bool) -> dict[str, np.ndarray] | None:
# Model decides when action is completed, so desire input is just a pulse triggered on rising edge
inputs['desire'][0] = 0
@@ -110,8 +110,7 @@ class ModelState:
if key in inputs and key not in ['desire']:
self.numpy_inputs[key][:] = inputs[key]
imgs_cl = {'input_imgs': self.frames['input_imgs'].prepare(buf, transform.flatten()),
'big_input_imgs': self.frames['big_input_imgs'].prepare(wbuf, transform_wide.flatten())}
imgs_cl = {name: self.frames[name].prepare(bufs[name], transforms[name].flatten()) for name in self.model_runner.vision_input_names}
# Prepare inputs using the model runner
self.model_runner.prepare_inputs(imgs_cl, self.numpy_inputs, self.frames)
@@ -315,6 +314,8 @@ def main(demo=False):
if prepare_only:
cloudlog.error(f"skipping model eval. Dropped {vipc_dropped_frames} frames")
bufs = {name: buf_extra if 'big' in name else buf_main for name in model.model_runner.vision_input_names}
transforms = {name: model_transform_extra if 'big' in name else model_transform_main for name in model.model_runner.vision_input_names}
inputs:dict[str, np.ndarray] = {
'desire': vec_desire,
'traffic_convention': traffic_convention,
@@ -324,7 +325,7 @@ def main(demo=False):
inputs['lateral_control_params'] = np.array([v_ego, steer_delay], dtype=np.float32)
mt1 = time.perf_counter()
model_output = model.run(buf_main, buf_extra, model_transform_main, model_transform_extra, inputs, prepare_only)
model_output = model.run(bufs, transforms, inputs, prepare_only)
mt2 = time.perf_counter()
model_execution_time = mt2 - mt1

View File

@@ -19,7 +19,7 @@ from openpilot.system.hardware import PC
from openpilot.system.hardware.hw import Paths
from pathlib import Path
CURRENT_SELECTOR_VERSION = 5
CURRENT_SELECTOR_VERSION = 6
REQUIRED_MIN_SELECTOR_VERSION = 5
USE_ONNX = os.getenv('USE_ONNX', PC)

View File

@@ -12,8 +12,15 @@ CUSTOM_MODEL_PATH = Paths.model_root()
# Set QCOM environment variable for TICI devices, potentially enabling hardware acceleration
if TICI:
USBGPU = "USBGPU" in os.environ
if USBGPU:
os.environ['AMD'] = '1'
os.environ['AMD_IFACE'] = 'USB'
elif TICI:
os.environ['QCOM'] = '1'
else:
os.environ['LLVM'] = '1'
os.environ['JIT'] = '2' # TODO: This may cause issues
class ModelData:
@@ -132,6 +139,13 @@ class ModelRunner(ModularRunner):
return self._model_data.output_slices
raise ValueError("Model data is not available. Ensure the model is loaded correctly.")
@property
def vision_input_names(self) -> list[str]:
"""Returns the list of vision input names from the input shapes."""
if self._model_data:
return list(self._model_data.input_shapes.keys())
raise ValueError("Model data is not available. Ensure the model is loaded correctly.")
@abstractmethod
def prepare_inputs(self, imgs_cl: CLMemDict, numpy_inputs: NumpyDict, frames: FrameDict) -> dict:
"""

View File

@@ -54,6 +54,11 @@ class TinygradRunner(ModelRunner, SupercomboTinygrad, PolicyTinygrad, VisionTiny
self.input_to_dtype[name] = info[2] # dtype
self.input_to_device[name] = info[3] # device
@property
def vision_input_names(self) -> list[str]:
"""Returns the list of vision input names from the input shapes."""
return [name for name in self.input_shapes.keys() if 'img' in name]
def prepare_vision_inputs(self, imgs_cl: CLMemDict, frames: FrameDict):
"""Prepares vision (image) inputs as Tinygrad Tensors."""
for key in imgs_cl:
@@ -109,6 +114,11 @@ class TinygradSplitRunner(ModelRunner):
vision_output = self.vision_runner.run_model()
return {**policy_output, **vision_output} # Combine results
@property
def vision_input_names(self) -> list[str]:
"""Returns the list of vision input names from the vision runner."""
return list(self.vision_runner.vision_input_names)
@property
def input_shapes(self) -> ShapeDict:
"""Returns the combined input shapes from both vision and policy models."""