Model: Sync modeld with upstream arbitrary vision inputs (#1004)

* Prepare sunnypilot modeld refactor: * This is needed to work with latest vision models from comma master. * Add this for dev-c3-new
2026-02-18 22:23:56 +08:00 · 2025-06-19 08:24:20 -06:00
parent 6b9c63acbe
commit d62c3cdef9
6 changed files with 36 additions and 29 deletions
--- a/sunnypilot/modeld_v2/SConscript
+++ b/sunnypilot/modeld_v2/SConscript
@@ -13,7 +13,6 @@ common_src = [
  "transforms/transform.cc",
 ]

-
 # OpenCL is a framework on Mac
 if arch == "Darwin":
  frameworks += ['OpenCL']
@@ -29,4 +28,3 @@ for pathdef, fn in {'TRANSFORM': 'transforms/transform.cl', 'LOADYUV': 'transfor
 cython_libs = envCython["LIBS"] + libs
 commonmodel_lib = lenv.Library('commonmodel', common_src)
 lenvCython.Program('models/commonmodel_pyx.so', 'models/commonmodel_pyx.pyx', LIBS=[commonmodel_lib, *cython_libs], FRAMEWORKS=frameworks)
-
--- a/sunnypilot/modeld_v2/fill_model_msg.py
+++ b/sunnypilot/modeld_v2/fill_model_msg.py
@@ -118,30 +118,14 @@ def fill_model_msg(base_msg: capnp._DynamicStructBuilder, extended_msg: capnp._D
  # action (includes lateral planning now)
  modelV2.action = action

-  # times at X_IDXS according to model plan
-  PLAN_T_IDXS = [np.nan] * ModelConstants.IDX_N
-  PLAN_T_IDXS[0] = 0.0
-  plan_x = net_output_data['plan'][0,:,Plan.POSITION][:,0].tolist()
-  for xidx in range(1, ModelConstants.IDX_N):
-    tidx = 0
-    # increment tidx until we find an element that's further away than the current xidx
-    while tidx < ModelConstants.IDX_N - 1 and plan_x[tidx+1] < ModelConstants.X_IDXS[xidx]:
-      tidx += 1
-    if tidx == ModelConstants.IDX_N - 1:
-      # if the Plan doesn't extend far enough, set plan_t to the max value (10s), then break
-      PLAN_T_IDXS[xidx] = ModelConstants.T_IDXS[ModelConstants.IDX_N - 1]
-      break
-    # interpolate to find `t` for the current xidx
-    current_x_val = plan_x[tidx]
-    next_x_val = plan_x[tidx+1]
-    p = (ModelConstants.X_IDXS[xidx] - current_x_val) / (next_x_val - current_x_val) if abs(next_x_val - current_x_val) > 1e-9 else float('nan')
-    PLAN_T_IDXS[xidx] = p * ModelConstants.T_IDXS[tidx+1] + (1 - p) * ModelConstants.T_IDXS[tidx]
+  # times at X_IDXS of edges and lines aren't used
+  LINE_T_IDXS: list[float] = []

  # lane lines
  modelV2.init('laneLines', 4)
  for i in range(4):
    lane_line = modelV2.laneLines[i]
-    fill_xyzt(lane_line, PLAN_T_IDXS, np.array(ModelConstants.X_IDXS), net_output_data['lane_lines'][0,i,:,0], net_output_data['lane_lines'][0,i,:,1])
+    fill_xyzt(lane_line, LINE_T_IDXS, np.array(ModelConstants.X_IDXS), net_output_data['lane_lines'][0,i,:,0], net_output_data['lane_lines'][0,i,:,1])
  modelV2.laneLineStds = net_output_data['lane_lines_stds'][0,:,0,0].tolist()
  modelV2.laneLineProbs = net_output_data['lane_lines_prob'][0,1::2].tolist()

@@ -151,7 +135,7 @@ def fill_model_msg(base_msg: capnp._DynamicStructBuilder, extended_msg: capnp._D
  modelV2.init('roadEdges', 2)
  for i in range(2):
    road_edge = modelV2.roadEdges[i]
-    fill_xyzt(road_edge, PLAN_T_IDXS, np.array(ModelConstants.X_IDXS), net_output_data['road_edges'][0,i,:,0], net_output_data['road_edges'][0,i,:,1])
+    fill_xyzt(road_edge, LINE_T_IDXS, np.array(ModelConstants.X_IDXS), net_output_data['road_edges'][0,i,:,0], net_output_data['road_edges'][0,i,:,1])
  modelV2.roadEdgeStds = net_output_data['road_edges_stds'][0,:,0,0].tolist()

  # leads
--- a/sunnypilot/modeld_v2/modeld.py
+++ b/sunnypilot/modeld_v2/modeld.py
@@ -62,7 +62,7 @@ class ModelState:
    self.MIN_LAT_CONTROL_SPEED = 0.3

    buffer_length = 5 if self.model_runner.is_20hz else 2
-    self.frames = {'input_imgs': DrivingModelFrame(context, buffer_length), 'big_input_imgs': DrivingModelFrame(context, buffer_length)}
+    self.frames = {name: DrivingModelFrame(context, buffer_length) for name in self.model_runner.vision_input_names}
    self.prev_desire = np.zeros(self.constants.DESIRE_LEN, dtype=np.float32)

    # img buffers are managed in openCL transform code
@@ -86,7 +86,7 @@ class ModelState:
      self.desire_reshape_dims = (self.numpy_inputs['desire'].shape[0], self.numpy_inputs['desire'].shape[1], -1,
                                  self.numpy_inputs['desire'].shape[2])

-  def run(self, buf: VisionBuf, wbuf: VisionBuf, transform: np.ndarray, transform_wide: np.ndarray,
+  def run(self, bufs: dict[str, VisionBuf], transforms: dict[str, np.ndarray],
                inputs: dict[str, np.ndarray], prepare_only: bool) -> dict[str, np.ndarray] | None:
    # Model decides when action is completed, so desire input is just a pulse triggered on rising edge
    inputs['desire'][0] = 0
@@ -110,8 +110,7 @@ class ModelState:
      if key in inputs and key not in ['desire']:
        self.numpy_inputs[key][:] = inputs[key]

-    imgs_cl = {'input_imgs': self.frames['input_imgs'].prepare(buf, transform.flatten()),
-               'big_input_imgs': self.frames['big_input_imgs'].prepare(wbuf, transform_wide.flatten())}
+    imgs_cl = {name: self.frames[name].prepare(bufs[name], transforms[name].flatten()) for name in self.model_runner.vision_input_names}

    # Prepare inputs using the model runner
    self.model_runner.prepare_inputs(imgs_cl, self.numpy_inputs, self.frames)
@@ -315,6 +314,8 @@ def main(demo=False):
    if prepare_only:
      cloudlog.error(f"skipping model eval. Dropped {vipc_dropped_frames} frames")

+    bufs = {name: buf_extra if 'big' in name else buf_main for name in model.model_runner.vision_input_names}
+    transforms = {name: model_transform_extra if 'big' in name else model_transform_main for name in model.model_runner.vision_input_names}
    inputs:dict[str, np.ndarray] = {
      'desire': vec_desire,
      'traffic_convention': traffic_convention,
@@ -324,7 +325,7 @@ def main(demo=False):
      inputs['lateral_control_params'] = np.array([v_ego, steer_delay], dtype=np.float32)

    mt1 = time.perf_counter()
-    model_output = model.run(buf_main, buf_extra, model_transform_main, model_transform_extra, inputs, prepare_only)
+    model_output = model.run(bufs, transforms, inputs, prepare_only)
    mt2 = time.perf_counter()
    model_execution_time = mt2 - mt1

--- a/sunnypilot/models/helpers.py
+++ b/sunnypilot/models/helpers.py
@@ -19,7 +19,7 @@ from openpilot.system.hardware import PC
 from openpilot.system.hardware.hw import Paths
 from pathlib import Path

-CURRENT_SELECTOR_VERSION = 5
+CURRENT_SELECTOR_VERSION = 6
 REQUIRED_MIN_SELECTOR_VERSION = 5

 USE_ONNX = os.getenv('USE_ONNX', PC)
--- a/sunnypilot/models/runners/model_runner.py
+++ b/sunnypilot/models/runners/model_runner.py
@@ -12,8 +12,15 @@ CUSTOM_MODEL_PATH = Paths.model_root()


 # Set QCOM environment variable for TICI devices, potentially enabling hardware acceleration
-if TICI:
+USBGPU = "USBGPU" in os.environ
+if USBGPU:
+  os.environ['AMD'] = '1'
+  os.environ['AMD_IFACE'] = 'USB'
+elif TICI:
  os.environ['QCOM'] = '1'
+else:
+  os.environ['LLVM'] = '1'
+  os.environ['JIT'] = '2'  # TODO: This may cause issues


 class ModelData:
@@ -132,6 +139,13 @@ class ModelRunner(ModularRunner):
      return self._model_data.output_slices
    raise ValueError("Model data is not available. Ensure the model is loaded correctly.")

+  @property
+  def vision_input_names(self) -> list[str]:
+    """Returns the list of vision input names from the input shapes."""
+    if self._model_data:
+      return list(self._model_data.input_shapes.keys())
+    raise ValueError("Model data is not available. Ensure the model is loaded correctly.")
+
  @abstractmethod
  def prepare_inputs(self, imgs_cl: CLMemDict, numpy_inputs: NumpyDict, frames: FrameDict) -> dict:
    """
--- a/sunnypilot/models/runners/tinygrad/tinygrad_runner.py
+++ b/sunnypilot/models/runners/tinygrad/tinygrad_runner.py
@@ -54,6 +54,11 @@ class TinygradRunner(ModelRunner, SupercomboTinygrad, PolicyTinygrad, VisionTiny
      self.input_to_dtype[name] = info[2]  # dtype
      self.input_to_device[name] = info[3]  # device

+  @property
+  def vision_input_names(self) -> list[str]:
+    """Returns the list of vision input names from the input shapes."""
+    return [name for name in self.input_shapes.keys() if 'img' in name]
+
  def prepare_vision_inputs(self, imgs_cl: CLMemDict, frames: FrameDict):
    """Prepares vision (image) inputs as Tinygrad Tensors."""
    for key in imgs_cl:
@@ -109,6 +114,11 @@ class TinygradSplitRunner(ModelRunner):
    vision_output = self.vision_runner.run_model()
    return {**policy_output, **vision_output} # Combine results

+  @property
+  def vision_input_names(self) -> list[str]:
+    """Returns the list of vision input names from the vision runner."""
+    return list(self.vision_runner.vision_input_names)
+
  @property
  def input_shapes(self) -> ShapeDict:
    """Returns the combined input shapes from both vision and policy models."""