modeld_v2: refactor abstract class to support off-policy models (#1672)

* modeld_v2: refactor abstract class to support off-policy models. * whoops * bump
2026-02-18 10:43:55 +08:00 · 2026-02-05 05:35:57 -08:00
parent 5c12a7cfc3
commit 4d65c52e6d
7 changed files with 50 additions and 11 deletions
--- a/cereal/custom.capnp
+++ b/cereal/custom.capnp
@@ -153,6 +153,7 @@ struct ModelManagerSP @0xaedffd8f31e7b55d {
      navigation @1;
      vision @2;
      policy @3;
+      offPolicy @4;
    }
  }

--- a/release/ci/model_generator.py
+++ b/release/ci/model_generator.py
@@ -68,8 +68,10 @@ def generate_metadata(model_path: Path, output_dir: Path, short_name: str):
    metadata_file = metadata_file.rename(output_path / f"{base}_{short_name.lower()}_metadata.pkl")

  # Build the metadata structure
+  model_type = "offPolicy" if "off_policy" in base else base.split("_")[-1]
+
  model_metadata = {
-    "type": base.split("_")[-1] if "dmonitoring" not in base else "dmonitoring",
+    "type": model_type,
    "artifact": {
      "file_name": tinygrad_file.name,
      "download_uri": {
--- a/sunnypilot/modeld_v2/SConscript
+++ b/sunnypilot/modeld_v2/SConscript
@@ -39,7 +39,7 @@ if PC:
  model_dir = Dir("models").abspath
  cmd = f'python3 {Dir("#sunnypilot/modeld_v2").abspath}/install_models_pc.py {model_dir}'

-  for model_name in ['supercombo', 'driving_vision', 'driving_policy']:
+  for model_name in ['supercombo', 'driving_vision', 'driving_off_policy', 'driving_policy']:
    if File(f"models/{model_name}.onnx").exists():
        inputs.append(File(f"models/{model_name}.onnx"))
        inputs.append(File(f"models/{model_name}_tinygrad.pkl"))
@@ -57,7 +57,7 @@ def tg_compile(flags, model_name):
  )

 # Compile small models
-for model_name in ['supercombo', 'driving_vision', 'driving_policy']:
+for model_name in ['supercombo', 'driving_vision', 'driving_off_policy', 'driving_policy']:
  if File(f"models/{model_name}.onnx").exists():
    flags = {
      'larch64': 'DEV=QCOM',
--- a/sunnypilot/modeld_v2/install_models_pc.py
+++ b/sunnypilot/modeld_v2/install_models_pc.py
@@ -44,7 +44,7 @@ def generate_metadata_pkl(model_path, output_path):

 def install_models(model_dir):
  model_dir = Path(model_dir)
-  models = ["driving_policy", "driving_vision"]
+  models = ["driving_off_policy", "driving_policy", "driving_vision"]
  found_models = []

  for model in models:
--- a/sunnypilot/models/fetcher.py
+++ b/sunnypilot/models/fetcher.py
@@ -116,7 +116,7 @@ class ModelCache:

 class ModelFetcher:
  """Handles fetching and caching of model data from remote source"""
-  MODEL_URL = "https://raw.githubusercontent.com/sunnypilot/sunnypilot-docs/refs/heads/gh-pages/docs/driving_models_v10.json"
+  MODEL_URL = "https://raw.githubusercontent.com/sunnypilot/sunnypilot-docs/refs/heads/gh-pages/docs/driving_models_v11.json"

  def __init__(self, params: Params):
    self.params = params
--- a/sunnypilot/models/runners/tinygrad/model_types.py
+++ b/sunnypilot/models/runners/tinygrad/model_types.py
@@ -13,6 +13,22 @@ SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')
 CUSTOM_MODEL_PATH = Paths.model_root()


+class OffPolicyTinygrad(ModularRunner, ABC):
+  """
+  A TinygradRunner specialized for off-policy models.
+
+  Uses a SplitParser to handle outputs specific to the off-policy part of a split model setup.
+  """
+  def __init__(self):
+    self._off_policy_parser = SplitParser()
+    self.parser_method_dict[ModelType.offPolicy] = self._parse_off_policy_outputs
+
+  def _parse_off_policy_outputs(self, model_outputs: np.ndarray) -> NumpyDict:
+    """Parses off-policy model outputs using SplitParser."""
+    result: NumpyDict = self._off_policy_parser.parse_policy_outputs(self._slice_outputs(model_outputs))
+    return result
+
+
 class PolicyTinygrad(ModularRunner, ABC):
  """
  A TinygradRunner specialized for policy-only models.
--- a/sunnypilot/models/runners/tinygrad/tinygrad_runner.py
+++ b/sunnypilot/models/runners/tinygrad/tinygrad_runner.py
@@ -4,7 +4,7 @@ import numpy as np
 from openpilot.sunnypilot.modeld_v2.runners.tinygrad_helpers import qcom_tensor_from_opencl_address
 from openpilot.sunnypilot.models.runners.constants import CLMemDict, FrameDict, NumpyDict, ModelType, ShapeDict, CUSTOM_MODEL_PATH, SliceDict
 from openpilot.sunnypilot.models.runners.model_runner import ModelRunner
-from openpilot.sunnypilot.models.runners.tinygrad.model_types import PolicyTinygrad, VisionTinygrad, SupercomboTinygrad
+from openpilot.sunnypilot.models.runners.tinygrad.model_types import PolicyTinygrad, VisionTinygrad, SupercomboTinygrad, OffPolicyTinygrad
 from openpilot.system.hardware import TICI
 from openpilot.sunnypilot.models.split_model_constants import SplitModelConstants
 from openpilot.sunnypilot.modeld_v2.constants import ModelConstants
@@ -12,7 +12,7 @@ from openpilot.sunnypilot.modeld_v2.constants import ModelConstants
 from tinygrad.tensor import Tensor


-class TinygradRunner(ModelRunner, SupercomboTinygrad, PolicyTinygrad, VisionTinygrad):
+class TinygradRunner(ModelRunner, SupercomboTinygrad, PolicyTinygrad, VisionTinygrad, OffPolicyTinygrad):
  """
  A ModelRunner implementation for executing Tinygrad models.

@@ -27,6 +27,7 @@ class TinygradRunner(ModelRunner, SupercomboTinygrad, PolicyTinygrad, VisionTiny
    SupercomboTinygrad.__init__(self)
    PolicyTinygrad.__init__(self)
    VisionTinygrad.__init__(self)
+    OffPolicyTinygrad.__init__(self)
    self._constants = ModelConstants
    self._model_data = self.models.get(model_type)
    if not self._model_data or not self._model_data.model:
@@ -106,13 +107,20 @@ class TinygradSplitRunner(ModelRunner):
    self.is_20hz_3d = True
    self.vision_runner = TinygradRunner(ModelType.vision)
    self.policy_runner = TinygradRunner(ModelType.policy)
+    self.off_policy_runner = TinygradRunner(ModelType.offPolicy) if self.models.get(ModelType.offPolicy) else None
    self._constants = SplitModelConstants

  def _run_model(self) -> NumpyDict:
    """Runs both vision and policy models and merges their parsed outputs."""
    policy_output = self.policy_runner.run_model()
    vision_output = self.vision_runner.run_model()
-    return {**policy_output, **vision_output} # Combine results
+    outputs = {**policy_output, **vision_output}
+
+    if self.off_policy_runner:
+      off_policy_output = self.off_policy_runner.run_model()
+      outputs.update(off_policy_output)
+
+    return outputs

  @property
  def vision_input_names(self) -> list[str]:
@@ -122,12 +130,18 @@ class TinygradSplitRunner(ModelRunner):
  @property
  def input_shapes(self) -> ShapeDict:
    """Returns the combined input shapes from both vision and policy models."""
-    return {**self.policy_runner.input_shapes, **self.vision_runner.input_shapes}
+    shapes = {**self.policy_runner.input_shapes, **self.vision_runner.input_shapes}
+    if self.off_policy_runner:
+      shapes.update(self.off_policy_runner.input_shapes)
+    return shapes

  @property
  def output_slices(self) -> SliceDict:
    """Returns the combined output slices from both vision and policy models."""
-    return {**self.policy_runner.output_slices, **self.vision_runner.output_slices}
+    slices = {**self.policy_runner.output_slices, **self.vision_runner.output_slices}
+    if self.off_policy_runner:
+      slices.update(self.off_policy_runner.output_slices)
+    return slices

  def prepare_inputs(self, imgs_cl: CLMemDict, numpy_inputs: NumpyDict, frames: FrameDict) -> dict:
    """Prepares inputs for both vision and policy models."""
@@ -135,5 +149,11 @@ class TinygradSplitRunner(ModelRunner):
    self.policy_runner.prepare_policy_inputs(numpy_inputs)
    # Vision inputs depend on imgs_cl and frames
    self.vision_runner.prepare_vision_inputs(imgs_cl, frames)
+    inputs = {**self.policy_runner.inputs, **self.vision_runner.inputs}
+
+    if self.off_policy_runner:
+      self.off_policy_runner.prepare_policy_inputs(numpy_inputs)
+      inputs.update(self.off_policy_runner.inputs)
+
    # Return combined inputs (though they are stored within respective runners)
-    return {**self.policy_runner.inputs, **self.vision_runner.inputs}
+    return inputs