diff --git a/.github/workflows/sunnypilot-build-model.yaml b/.github/workflows/sunnypilot-build-model.yaml index bc4ecfe04..293c0f2a0 100644 --- a/.github/workflows/sunnypilot-build-model.yaml +++ b/.github/workflows/sunnypilot-build-model.yaml @@ -173,7 +173,7 @@ jobs: echo "Compiling: $onnx_file -> $output_file" QCOM=1 python3 "${{ env.TINYGRAD_PATH }}/examples/openpilot/compile3.py" "$onnx_file" "$output_file" - QCOM=1 python3 "${{ env.MODELS_DIR }}/../get_model_metadata.py" "$onnx_file" || true + DEV=QCOM FLOAT16=1 NOLOCALS=1 JIT_BATCH_SIZE=0 python3 "${{ env.MODELS_DIR }}/../get_model_metadata.py" "$onnx_file" || true done - name: Prepare Output diff --git a/selfdrive/debug/uiview.py b/selfdrive/debug/uiview.py index ad3ccea03..eac1f8fbf 100755 --- a/selfdrive/debug/uiview.py +++ b/selfdrive/debug/uiview.py @@ -3,7 +3,7 @@ import time from cereal import car, log, messaging from openpilot.common.params import Params -from openpilot.system.manager.process_config import managed_processes, is_snpe_model, is_tinygrad_model, is_stock_model +from openpilot.system.manager.process_config import managed_processes, is_tinygrad_model, is_stock_model from openpilot.system.hardware import HARDWARE if __name__ == "__main__": @@ -11,8 +11,6 @@ if __name__ == "__main__": params = Params() params.put("CarParams", CP.to_bytes()) - if use_snpe_modeld := is_snpe_model(False, params, CP): - print("Using SNPE modeld") if use_tinygrad_modeld := is_tinygrad_model(False, params, CP): print("Using TinyGrad modeld") if use_stock_modeld := is_stock_model(False, params, CP): @@ -21,7 +19,7 @@ if __name__ == "__main__": HARDWARE.set_power_save(False) procs = ['camerad', 'ui', 'calibrationd', 'plannerd', 'dmonitoringmodeld', 'dmonitoringd'] - procs += ["modeld_snpe" if use_snpe_modeld else "modeld_tinygrad" if use_tinygrad_modeld else "modeld"] + procs += ["modeld_tinygrad" if use_tinygrad_modeld else "modeld"] for p in procs: managed_processes[p].start() diff --git a/sunnypilot/SConscript b/sunnypilot/SConscript index eb3698f9d..09ad39ab4 100644 --- a/sunnypilot/SConscript +++ b/sunnypilot/SConscript @@ -1,4 +1,3 @@ SConscript(['common/transformations/SConscript']) -SConscript(['modeld/SConscript']) SConscript(['modeld_v2/SConscript']) SConscript(['selfdrive/locationd/SConscript']) diff --git a/sunnypilot/modeld_v2/SConscript b/sunnypilot/modeld_v2/SConscript index 28d39a75f..ddf889c0c 100644 --- a/sunnypilot/modeld_v2/SConscript +++ b/sunnypilot/modeld_v2/SConscript @@ -1,34 +1,8 @@ import os import glob -Import('env', 'envCython', 'arch', 'cereal', 'messaging', 'common', 'visionipc') +Import('env', 'arch') lenv = env.Clone() -lenvCython = envCython.Clone() - -libs = [cereal, messaging, visionipc, common, 'capnp', 'kj', 'pthread'] -frameworks = [] - -common_src = [ - "models/commonmodel.cc", - "transforms/loadyuv.cc", - "transforms/transform.cc", -] - -# OpenCL is a framework on Mac -if arch == "Darwin": - frameworks += ['OpenCL'] -else: - libs += ['OpenCL'] - -# Set path definitions -for pathdef, fn in {'TRANSFORM': 'transforms/transform.cl', 'LOADYUV': 'transforms/loadyuv.cl'}.items(): - for xenv in (lenv, lenvCython): - xenv['CXXFLAGS'].append(f'-D{pathdef}_PATH=\\"{File(fn).abspath}\\"') - -# Compile cython -cython_libs = envCython["LIBS"] + libs -commonmodel_lib = lenv.Library('commonmodel', common_src) -lenvCython.Program('models/commonmodel_pyx.so', 'models/commonmodel_pyx.pyx', LIBS=[commonmodel_lib, *cython_libs], FRAMEWORKS=frameworks) tinygrad_files = ["#"+x for x in glob.glob(env.Dir("#tinygrad_repo").relpath + "/**", recursive=True, root_dir=env.Dir("#").abspath) if 'pycache' not in x] # Get model metadata @@ -47,20 +21,39 @@ if PC: if outputs: lenv.Command(outputs, inputs, cmd) +tg_flags = { + 'larch64': 'DEV=QCOM FLOAT16=1 NOLOCALS=1 JIT_BATCH_SIZE=0', + 'Darwin': f'DEV=CPU THREADS=0 HOME={os.path.expanduser("~")}', +}.get(arch, 'DEV=CPU CPU_LLVM=1 THREADS=0') + +image_flag = { + 'larch64': 'IMAGE=2', +}.get(arch, 'IMAGE=0') + def tg_compile(flags, model_name): pythonpath_string = 'PYTHONPATH="${PYTHONPATH}:' + env.Dir("#tinygrad_repo").abspath + '"' fn = File(f"models/{model_name}").abspath + out = fn + "_tinygrad.pkl" + return lenv.Command( - fn + "_tinygrad.pkl", + out, [fn + ".onnx"] + tinygrad_files, - f'{pythonpath_string} {flags} python3 {Dir("#tinygrad_repo").abspath}/examples/openpilot/compile3.py {fn}.onnx {fn}_tinygrad.pkl' + f'{pythonpath_string} {flags} {image_flag} python3 {Dir("#tinygrad_repo").abspath}/examples/openpilot/compile3.py {fn}.onnx {out}' ) -# Compile small models +# Compile models for model_name in ['supercombo', 'driving_vision', 'driving_off_policy', 'driving_policy']: if File(f"models/{model_name}.onnx").exists(): - flags = { - 'larch64': 'DEV=QCOM FLOAT16=1 NOLOCALS=1 IMAGE=2 JIT_BATCH_SIZE=0', - 'Darwin': f'DEV=CPU HOME={os.path.expanduser("~")} IMAGE=0', # tinygrad calls brew which needs a $HOME in the env - }.get(arch, 'DEV=CPU CPU_LLVM=1 IMAGE=0') - tg_compile(flags, model_name) + tg_compile(tg_flags, model_name) + +script_files = [File("warp.py"), File(Dir("#selfdrive/modeld").File("compile_warp.py").abspath)] +pythonpath_string = 'PYTHONPATH="${PYTHONPATH}:' + env.Dir("#tinygrad_repo").abspath + ':' + env.Dir("#").abspath + '"' +compile_warp_cmd = f'{pythonpath_string} {tg_flags} python3 -m sunnypilot.modeld_v2.warp' + +from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye +warp_targets = [] +for cam in [_ar_ox_fisheye, _os_fisheye]: + w, h = cam.width, cam.height + for bl in [2, 5]: + warp_targets.append(File(f"models/warp_{w}x{h}_b{bl}_tinygrad.pkl").abspath) +lenv.Command(warp_targets, tinygrad_files + script_files, compile_warp_cmd) diff --git a/sunnypilot/modeld_v2/get_model_metadata.py b/sunnypilot/modeld_v2/get_model_metadata.py index e0b5adc51..838b1e9f4 100755 --- a/sunnypilot/modeld_v2/get_model_metadata.py +++ b/sunnypilot/modeld_v2/get_model_metadata.py @@ -1,36 +1,51 @@ #!/usr/bin/env python3 import sys import pathlib -import onnx import codecs import pickle from typing import Any +from tinygrad.nn.onnx import OnnxPBParser -def get_name_and_shape(value_info:onnx.ValueInfoProto) -> tuple[str, tuple[int,...]]: - shape = tuple([int(dim.dim_value) for dim in value_info.type.tensor_type.shape.dim]) - name = value_info.name + +class MetadataOnnxPBParser(OnnxPBParser): + def _parse_ModelProto(self) -> dict: + obj: dict[str, Any] = {"graph": {"input": [], "output": []}, "metadata_props": []} + for fid, wire_type in self._parse_message(self.reader.len): + match fid: + case 7: + obj["graph"] = self._parse_GraphProto() + case 14: + obj["metadata_props"].append(self._parse_StringStringEntryProto()) + case _: + self.reader.skip_field(wire_type) + return obj + + +def get_name_and_shape(value_info: dict[str, Any]) -> tuple[str, tuple[int, ...]]: + shape = tuple(int(dim) if isinstance(dim, int) else 0 for dim in value_info["parsed_type"].shape) + name = value_info["name"] return name, shape -def get_metadata_value_by_name(model:onnx.ModelProto, name:str) -> str | Any: - for prop in model.metadata_props: - if prop.key == name: - return prop.value +def get_metadata_value_by_name(model: dict[str, Any], name: str) -> str | Any: + for prop in model["metadata_props"]: + if prop["key"] == name: + return prop["value"] return None if __name__ == "__main__": model_path = pathlib.Path(sys.argv[1]) - model = onnx.load(str(model_path)) + model = MetadataOnnxPBParser(model_path).parse() output_slices = get_metadata_value_by_name(model, 'output_slices') assert output_slices is not None, 'output_slices not found in metadata' metadata = { 'model_checkpoint': get_metadata_value_by_name(model, 'model_checkpoint'), 'output_slices': pickle.loads(codecs.decode(output_slices.encode(), "base64")), - 'input_shapes': dict([get_name_and_shape(x) for x in model.graph.input]), - 'output_shapes': dict([get_name_and_shape(x) for x in model.graph.output]) + 'input_shapes': dict(get_name_and_shape(x) for x in model["graph"]["input"]), + 'output_shapes': dict(get_name_and_shape(x) for x in model["graph"]["output"]), } metadata_path = model_path.parent / (model_path.stem + '_metadata.pkl') diff --git a/sunnypilot/modeld_v2/install_models_pc.py b/sunnypilot/modeld_v2/install_models_pc.py index a378d90b1..d203de348 100755 --- a/sunnypilot/modeld_v2/install_models_pc.py +++ b/sunnypilot/modeld_v2/install_models_pc.py @@ -3,41 +3,27 @@ import sys import shutil import pickle import codecs -import onnx from pathlib import Path from openpilot.system.hardware.hw import Paths - - -def get_name_and_shape(value_info): - shape = tuple([int(dim.dim_value) for dim in value_info.type.tensor_type.shape.dim]) - return value_info.name, shape - - -def get_metadata_value_by_name(model, name): - for prop in model.metadata_props: - if prop.key == name: - return prop.value - return None +from sunnypilot.modeld_v2.get_model_metadata import MetadataOnnxPBParser, get_name_and_shape, get_metadata_value_by_name def generate_metadata_pkl(model_path, output_path): try: - model = onnx.load(str(model_path)) + model = MetadataOnnxPBParser(model_path).parse() output_slices = get_metadata_value_by_name(model, 'output_slices') - - if output_slices: - metadata = { - 'model_checkpoint': get_metadata_value_by_name(model, 'model_checkpoint'), - 'output_slices': pickle.loads(codecs.decode(output_slices.encode(), "base64")), - 'input_shapes': dict([get_name_and_shape(x) for x in model.graph.input]), - 'output_shapes': dict([get_name_and_shape(x) for x in model.graph.output]) - } - with open(output_path, 'wb') as f: - pickle.dump(metadata, f) - return True - else: + if not output_slices: return False + metadata = { + 'model_checkpoint': get_metadata_value_by_name(model, 'model_checkpoint'), + 'output_slices': pickle.loads(codecs.decode(output_slices.encode(), "base64")), + 'input_shapes': dict(get_name_and_shape(x) for x in model["graph"]["input"]), + 'output_shapes': dict(get_name_and_shape(x) for x in model["graph"]["output"]), + } + with open(output_path, 'wb') as f: + pickle.dump(metadata, f) + return True except Exception: return False diff --git a/sunnypilot/modeld_v2/modeld.py b/sunnypilot/modeld_v2/modeld.py index e25078f52..c38ba40d4 100755 --- a/sunnypilot/modeld_v2/modeld.py +++ b/sunnypilot/modeld_v2/modeld.py @@ -26,7 +26,7 @@ from openpilot.selfdrive.controls.lib.drive_helpers import get_accel_from_plan, from openpilot.sunnypilot.modeld_v2.fill_model_msg import fill_model_msg, fill_pose_msg, PublishState, get_curvature_from_output from openpilot.sunnypilot.modeld_v2.constants import Plan -from openpilot.sunnypilot.modeld_v2.models.commonmodel_pyx import DrivingModelFrame, CLContext +from openpilot.sunnypilot.modeld_v2.warp import Warp from openpilot.sunnypilot.modeld_v2.meta_helper import load_meta_constants from openpilot.sunnypilot.modeld_v2.camera_offset_helper import CameraOffsetHelper @@ -49,12 +49,12 @@ class FrameMeta: class ModelState(ModelStateBase): - frames: dict[str, DrivingModelFrame] + frames: dict[str, Warp] inputs: dict[str, np.ndarray] prev_desire: np.ndarray # for tracking the rising edge of the pulse temporal_idxs: slice | np.ndarray - def __init__(self, context: CLContext): + def __init__(self): ModelStateBase.__init__(self) try: self.model_runner = get_model_runner() @@ -73,17 +73,16 @@ class ModelState(ModelStateBase): self.PLANPLUS_CONTROL: float = 1.0 buffer_length = 5 if self.model_runner.is_20hz else 2 - self.frames = {name: DrivingModelFrame(context, buffer_length) for name in self.model_runner.vision_input_names} + self.warp = Warp(buffer_length) self.prev_desire = np.zeros(self.constants.DESIRE_LEN, dtype=np.float32) - - # img buffers are managed in openCL transform code self.numpy_inputs = {} self.temporal_buffers = {} self.temporal_idxs_map = {} for key, shape in self.model_runner.input_shapes.items(): - if key not in self.frames: # Managed by opencl + if key not in self.model_runner.vision_input_names: # Policy inputs self.numpy_inputs[key] = np.zeros(shape, dtype=np.float32) + # Temporal input: shape is [batch, history, features] if len(shape) == 3 and shape[1] > 1: buffer_history_len = shape[1] * 4 if shape[1] < 99 else shape[1] # Allow for higher history buffers in the future @@ -129,10 +128,10 @@ class ModelState(ModelStateBase): if key in inputs and key not in [self.desire_key]: self.numpy_inputs[key][:] = inputs[key] - imgs_cl = {name: self.frames[name].prepare(bufs[name], transforms[name].flatten()) for name in self.model_runner.vision_input_names} - - # Prepare inputs using the model runner - self.model_runner.prepare_inputs(imgs_cl, self.numpy_inputs, self.frames) + imgs_tensors = self.warp.process(bufs, transforms) + for name, tensor in imgs_tensors.items(): + self.model_runner.inputs[name] = tensor + self.model_runner.prepare_inputs(self.numpy_inputs) if prepare_only: return None @@ -147,12 +146,11 @@ class ModelState(ModelStateBase): if "desired_curvature" in outputs: input_name_prev = None - if "prev_desired_curvs" in self.numpy_inputs.keys(): - input_name_prev = 'prev_desired_curvs' - elif "prev_desired_curv" in self.numpy_inputs.keys(): + if "prev_desired_curv" in self.numpy_inputs.keys(): input_name_prev = 'prev_desired_curv' if input_name_prev and input_name_prev in self.temporal_buffers: self.process_desired_curvature(outputs, input_name_prev) + return outputs def process_desired_curvature(self, outputs, input_name_prev): @@ -165,9 +163,8 @@ class ModelState(ModelStateBase): def get_action_from_model(self, model_output: dict[str, np.ndarray], prev_action: log.ModelDataV2.Action, lat_action_t: float, long_action_t: float, v_ego: float) -> log.ModelDataV2.Action: plan = model_output['plan'][0] - if 'planplus' in model_output: - recovery_power = self.PLANPLUS_CONTROL * (0.75 if v_ego > 20.0 else 1.0) - plan = plan + recovery_power * model_output['planplus'][0] + if 'planplus' in model_output and self.PLANPLUS_CONTROL != 1.0: + plan = plan + (self.PLANPLUS_CONTROL - 1.0) * model_output['planplus'][0] desired_accel, should_stop = get_accel_from_plan(plan[:, Plan.VELOCITY][:, 0], plan[:, Plan.ACCELERATION][:, 0], self.constants.T_IDXS, action_t=long_action_t) desired_accel = smooth_value(desired_accel, prev_action.desiredAcceleration, self.LONG_SMOOTH_SECONDS) @@ -190,10 +187,8 @@ def main(demo=False): setproctitle(PROCESS_NAME) config_realtime_process(7, 54) - cloudlog.warning("setting up CL context") - cl_context = CLContext() - cloudlog.warning("CL context ready; loading model") - model = ModelState(cl_context) + cloudlog.warning("loading model") + model = ModelState() cloudlog.warning("models loaded, modeld starting") # visionipc clients @@ -206,8 +201,8 @@ def main(demo=False): time.sleep(.1) vipc_client_main_stream = VisionStreamType.VISION_STREAM_WIDE_ROAD if main_wide_camera else VisionStreamType.VISION_STREAM_ROAD - vipc_client_main = VisionIpcClient("camerad", vipc_client_main_stream, True, cl_context) - vipc_client_extra = VisionIpcClient("camerad", VisionStreamType.VISION_STREAM_WIDE_ROAD, False, cl_context) + vipc_client_main = VisionIpcClient("camerad", vipc_client_main_stream, True) + vipc_client_extra = VisionIpcClient("camerad", VisionStreamType.VISION_STREAM_WIDE_ROAD, False) cloudlog.warning(f"vision stream set up, main_wide_camera: {main_wide_camera}, use_extra_client: {use_extra_client}") while not vipc_client_main.connect(False): diff --git a/sunnypilot/modeld_v2/models/commonmodel.cc b/sunnypilot/modeld_v2/models/commonmodel.cc deleted file mode 100644 index 5cd3a84fc..000000000 --- a/sunnypilot/modeld_v2/models/commonmodel.cc +++ /dev/null @@ -1,62 +0,0 @@ -#include "sunnypilot/modeld_v2/models/commonmodel.h" - -#include -#include - -#include "common/clutil.h" - -DrivingModelFrame::DrivingModelFrame(cl_device_id device_id, cl_context context, uint8_t buffer_length) : ModelFrame(device_id, context), buffer_length(buffer_length) { - input_frames = std::make_unique(buf_size); - input_frames_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err)); - img_buffer_20hz_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buffer_length*frame_size_bytes, NULL, &err)); - region.origin = (buffer_length - 1) * frame_size_bytes; - region.size = frame_size_bytes; - last_img_cl = CL_CHECK_ERR(clCreateSubBuffer(img_buffer_20hz_cl, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err)); - // printf("Buffer length: %d, region origin: %lu, region size: %lu\n", buffer_length, region.origin, region.size); - - loadyuv_init(&loadyuv, context, device_id, MODEL_WIDTH, MODEL_HEIGHT); - init_transform(device_id, context, MODEL_WIDTH, MODEL_HEIGHT); -} - -cl_mem* DrivingModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) { - run_transform(yuv_cl, MODEL_WIDTH, MODEL_HEIGHT, frame_width, frame_height, frame_stride, frame_uv_offset, projection); - - for (int i = 0; i < (buffer_length - 1); i++) { - CL_CHECK(clEnqueueCopyBuffer(q, img_buffer_20hz_cl, img_buffer_20hz_cl, (i+1)*frame_size_bytes, i*frame_size_bytes, frame_size_bytes, 0, nullptr, nullptr)); - } - loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, last_img_cl); - - copy_queue(&loadyuv, q, img_buffer_20hz_cl, input_frames_cl, 0, 0, frame_size_bytes); - copy_queue(&loadyuv, q, last_img_cl, input_frames_cl, 0, frame_size_bytes, frame_size_bytes); - - // NOTE: Since thneed is using a different command queue, this clFinish is needed to ensure the image is ready. - clFinish(q); - return &input_frames_cl; -} - -DrivingModelFrame::~DrivingModelFrame() { - deinit_transform(); - loadyuv_destroy(&loadyuv); - CL_CHECK(clReleaseMemObject(img_buffer_20hz_cl)); - CL_CHECK(clReleaseMemObject(last_img_cl)); - CL_CHECK(clReleaseCommandQueue(q)); -} - - -MonitoringModelFrame::MonitoringModelFrame(cl_device_id device_id, cl_context context) : ModelFrame(device_id, context) { - input_frames = std::make_unique(buf_size); - input_frame_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err)); - - init_transform(device_id, context, MODEL_WIDTH, MODEL_HEIGHT); -} - -cl_mem* MonitoringModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) { - run_transform(yuv_cl, MODEL_WIDTH, MODEL_HEIGHT, frame_width, frame_height, frame_stride, frame_uv_offset, projection); - clFinish(q); - return &y_cl; -} - -MonitoringModelFrame::~MonitoringModelFrame() { - deinit_transform(); - CL_CHECK(clReleaseCommandQueue(q)); -} diff --git a/sunnypilot/modeld_v2/models/commonmodel.h b/sunnypilot/modeld_v2/models/commonmodel.h deleted file mode 100644 index 8203e064e..000000000 --- a/sunnypilot/modeld_v2/models/commonmodel.h +++ /dev/null @@ -1,97 +0,0 @@ -#pragma once - -#include -#include -#include - -#include - -#define CL_USE_DEPRECATED_OPENCL_1_2_APIS -#ifdef __APPLE__ -#include -#else -#include -#endif - -#include "common/mat.h" -#include "sunnypilot/modeld_v2/transforms/loadyuv.h" -#include "sunnypilot/modeld_v2/transforms/transform.h" - -class ModelFrame { -public: - ModelFrame(cl_device_id device_id, cl_context context) { - q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err)); - } - virtual ~ModelFrame() {} - virtual cl_mem* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) { return NULL; } - uint8_t* buffer_from_cl(cl_mem *in_frames, int buffer_size) { - CL_CHECK(clEnqueueReadBuffer(q, *in_frames, CL_TRUE, 0, buffer_size, input_frames.get(), 0, nullptr, nullptr)); - clFinish(q); - return &input_frames[0]; - } - - int MODEL_WIDTH; - int MODEL_HEIGHT; - int MODEL_FRAME_SIZE; - int buf_size; - -protected: - cl_mem y_cl, u_cl, v_cl; - Transform transform; - cl_command_queue q; - std::unique_ptr input_frames; - - void init_transform(cl_device_id device_id, cl_context context, int model_width, int model_height) { - y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, model_width * model_height, NULL, &err)); - u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (model_width / 2) * (model_height / 2), NULL, &err)); - v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (model_width / 2) * (model_height / 2), NULL, &err)); - transform_init(&transform, context, device_id); - } - - void deinit_transform() { - transform_destroy(&transform); - CL_CHECK(clReleaseMemObject(v_cl)); - CL_CHECK(clReleaseMemObject(u_cl)); - CL_CHECK(clReleaseMemObject(y_cl)); - } - - void run_transform(cl_mem yuv_cl, int model_width, int model_height, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) { - transform_queue(&transform, q, - yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset, - y_cl, u_cl, v_cl, model_width, model_height, projection); - } -}; - -class DrivingModelFrame : public ModelFrame { -public: - DrivingModelFrame(cl_device_id device_id, cl_context context, uint8_t buffer_length); - ~DrivingModelFrame(); - cl_mem* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection); - - const int MODEL_WIDTH = 512; - const int MODEL_HEIGHT = 256; - const int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT * 3 / 2; - const int buf_size = MODEL_FRAME_SIZE * 2; - const size_t frame_size_bytes = MODEL_FRAME_SIZE * sizeof(uint8_t); - const uint8_t buffer_length; - -private: - LoadYUVState loadyuv; - cl_mem img_buffer_20hz_cl, last_img_cl, input_frames_cl; - cl_buffer_region region; -}; - -class MonitoringModelFrame : public ModelFrame { -public: - MonitoringModelFrame(cl_device_id device_id, cl_context context); - ~MonitoringModelFrame(); - cl_mem* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection); - - const int MODEL_WIDTH = 1440; - const int MODEL_HEIGHT = 960; - const int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT; - const int buf_size = MODEL_FRAME_SIZE; - -private: - cl_mem input_frame_cl; -}; diff --git a/sunnypilot/modeld_v2/models/commonmodel.pxd b/sunnypilot/modeld_v2/models/commonmodel.pxd deleted file mode 100644 index 55023ac4b..000000000 --- a/sunnypilot/modeld_v2/models/commonmodel.pxd +++ /dev/null @@ -1,27 +0,0 @@ -# distutils: language = c++ - -from msgq.visionipc.visionipc cimport cl_device_id, cl_context, cl_mem - -cdef extern from "common/mat.h": - cdef struct mat3: - float v[9] - -cdef extern from "common/clutil.h": - cdef unsigned long CL_DEVICE_TYPE_DEFAULT - cl_device_id cl_get_device_id(unsigned long) - cl_context cl_create_context(cl_device_id) - void cl_release_context(cl_context) - -cdef extern from "sunnypilot/modeld_v2/models/commonmodel.h": - cppclass ModelFrame: - int buf_size - unsigned char * buffer_from_cl(cl_mem*, int); - cl_mem * prepare(cl_mem, int, int, int, int, mat3) - - cppclass DrivingModelFrame: - int buf_size - DrivingModelFrame(cl_device_id, cl_context, unsigned char) - - cppclass MonitoringModelFrame: - int buf_size - MonitoringModelFrame(cl_device_id, cl_context) diff --git a/sunnypilot/modeld_v2/models/commonmodel_pyx.pxd b/sunnypilot/modeld_v2/models/commonmodel_pyx.pxd deleted file mode 100644 index 0bb798625..000000000 --- a/sunnypilot/modeld_v2/models/commonmodel_pyx.pxd +++ /dev/null @@ -1,13 +0,0 @@ -# distutils: language = c++ - -from msgq.visionipc.visionipc cimport cl_mem -from msgq.visionipc.visionipc_pyx cimport CLContext as BaseCLContext - -cdef class CLContext(BaseCLContext): - pass - -cdef class CLMem: - cdef cl_mem * mem - - @staticmethod - cdef create(void*) diff --git a/sunnypilot/modeld_v2/models/commonmodel_pyx.pyx b/sunnypilot/modeld_v2/models/commonmodel_pyx.pyx deleted file mode 100644 index 78a891f03..000000000 --- a/sunnypilot/modeld_v2/models/commonmodel_pyx.pyx +++ /dev/null @@ -1,74 +0,0 @@ -# distutils: language = c++ -# cython: c_string_encoding=ascii, language_level=3 - -import numpy as np -cimport numpy as cnp -from libc.string cimport memcpy -from libc.stdint cimport uintptr_t, uint8_t - -from msgq.visionipc.visionipc cimport cl_mem -from msgq.visionipc.visionipc_pyx cimport VisionBuf, CLContext as BaseCLContext -from .commonmodel cimport CL_DEVICE_TYPE_DEFAULT, cl_get_device_id, cl_create_context, cl_release_context -from .commonmodel cimport mat3, ModelFrame as cppModelFrame, DrivingModelFrame as cppDrivingModelFrame, MonitoringModelFrame as cppMonitoringModelFrame - - -cdef class CLContext(BaseCLContext): - def __cinit__(self): - self.device_id = cl_get_device_id(CL_DEVICE_TYPE_DEFAULT) - self.context = cl_create_context(self.device_id) - - def __dealloc__(self): - if self.context: - cl_release_context(self.context) - -cdef class CLMem: - @staticmethod - cdef create(void * cmem): - mem = CLMem() - mem.mem = cmem - return mem - - @property - def mem_address(self): - return (self.mem) - -def cl_from_visionbuf(VisionBuf buf): - return CLMem.create(&buf.buf.buf_cl) - - -cdef class ModelFrame: - cdef cppModelFrame * frame - cdef int buf_size - - def __dealloc__(self): - del self.frame - - def prepare(self, VisionBuf buf, float[:] projection): - cdef mat3 cprojection - memcpy(cprojection.v, &projection[0], 9*sizeof(float)) - cdef cl_mem * data - data = self.frame.prepare(buf.buf.buf_cl, buf.width, buf.height, buf.stride, buf.uv_offset, cprojection) - return CLMem.create(data) - - def buffer_from_cl(self, CLMem in_frames): - cdef unsigned char * data2 - data2 = self.frame.buffer_from_cl(in_frames.mem, self.buf_size) - return np.asarray( data2) - - -cdef class DrivingModelFrame(ModelFrame): - cdef cppDrivingModelFrame * _frame - - def __cinit__(self, CLContext context, int buffer_length=2): - self._frame = new cppDrivingModelFrame(context.device_id, context.context, buffer_length) - self.frame = (self._frame) - self.buf_size = self._frame.buf_size - -cdef class MonitoringModelFrame(ModelFrame): - cdef cppMonitoringModelFrame * _frame - - def __cinit__(self, CLContext context): - self._frame = new cppMonitoringModelFrame(context.device_id, context.context) - self.frame = (self._frame) - self.buf_size = self._frame.buf_size - diff --git a/sunnypilot/modeld_v2/parse_model_outputs_split.py b/sunnypilot/modeld_v2/parse_model_outputs_split.py index a099facd1..831649e3c 100644 --- a/sunnypilot/modeld_v2/parse_model_outputs_split.py +++ b/sunnypilot/modeld_v2/parse_model_outputs_split.py @@ -108,8 +108,8 @@ class Parser: plan_in_N, plan_out_N = (SplitModelConstants.PLAN_MHP_N, SplitModelConstants.PLAN_MHP_SELECTION) if plan_mhp else (0, 0) self.parse_mdn('plan', outs, in_N=plan_in_N, out_N=plan_out_N, out_shape=(SplitModelConstants.IDX_N, SplitModelConstants.PLAN_WIDTH)) - if 'planplus' in outs: - self.parse_mdn('planplus', outs, in_N=plan_in_N, out_N=plan_out_N, out_shape=(SplitModelConstants.IDX_N, SplitModelConstants.PLAN_WIDTH)) + if 'planplus' in outs: + self.parse_mdn('planplus', outs, in_N=0, out_N=0, out_shape=(SplitModelConstants.IDX_N, SplitModelConstants.PLAN_WIDTH)) def split_outputs(self, outs: dict[str, np.ndarray]) -> None: if 'desired_curvature' in outs: diff --git a/sunnypilot/modeld_v2/runners/ort_helpers.py b/sunnypilot/modeld_v2/runners/ort_helpers.py deleted file mode 100644 index 26afb0356..000000000 --- a/sunnypilot/modeld_v2/runners/ort_helpers.py +++ /dev/null @@ -1,36 +0,0 @@ -import onnx -import onnxruntime as ort -import numpy as np -import itertools - -ORT_TYPES_TO_NP_TYPES = {'tensor(float16)': np.float16, 'tensor(float)': np.float32, 'tensor(uint8)': np.uint8} - -def attributeproto_fp16_to_fp32(attr): - float32_list = np.frombuffer(attr.raw_data, dtype=np.float16) - attr.data_type = 1 - attr.raw_data = float32_list.astype(np.float32).tobytes() - -def convert_fp16_to_fp32(model): - for i in model.graph.initializer: - if i.data_type == 10: - attributeproto_fp16_to_fp32(i) - for i in itertools.chain(model.graph.input, model.graph.output): - if i.type.tensor_type.elem_type == 10: - i.type.tensor_type.elem_type = 1 - for i in model.graph.node: - if i.op_type == 'Cast' and i.attribute[0].i == 10: - i.attribute[0].i = 1 - for a in i.attribute: - if hasattr(a, 't'): - if a.t.data_type == 10: - attributeproto_fp16_to_fp32(a.t) - return model.SerializeToString() - - -def make_onnx_cpu_runner(model_path): - options = ort.SessionOptions() - options.intra_op_num_threads = 4 - options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL - options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL - model_data = convert_fp16_to_fp32(onnx.load(model_path)) - return ort.InferenceSession(model_data, options, providers=['CPUExecutionProvider']) diff --git a/sunnypilot/modeld_v2/runners/tinygrad_helpers.py b/sunnypilot/modeld_v2/runners/tinygrad_helpers.py deleted file mode 100644 index 776381341..000000000 --- a/sunnypilot/modeld_v2/runners/tinygrad_helpers.py +++ /dev/null @@ -1,8 +0,0 @@ - -from tinygrad.tensor import Tensor -from tinygrad.helpers import to_mv - -def qcom_tensor_from_opencl_address(opencl_address, shape, dtype): - cl_buf_desc_ptr = to_mv(opencl_address, 8).cast('Q')[0] - rawbuf_ptr = to_mv(cl_buf_desc_ptr, 0x100).cast('Q')[20] # offset 0xA0 is a raw gpu pointer. - return Tensor.from_blob(rawbuf_ptr, shape, dtype=dtype, device='QCOM') diff --git a/sunnypilot/modeld_v2/tests/test_buffer_logic_inspect.py b/sunnypilot/modeld_v2/tests/test_buffer_logic_inspect.py index f664db31b..15009c94d 100644 --- a/sunnypilot/modeld_v2/tests/test_buffer_logic_inspect.py +++ b/sunnypilot/modeld_v2/tests/test_buffer_logic_inspect.py @@ -53,7 +53,7 @@ class DummyModelRunner: self.is_20hz = False # Minimal prepare/run methods so ModelState can be run without actually running the model - def prepare_inputs(self, imgs_cl, numpy_inputs, frames): + def prepare_inputs(self, numpy_inputs): return None def run_model(self): @@ -105,7 +105,7 @@ def get_expected_indices(shape, constants, mode, key=None): @pytest.mark.parametrize("shapes,mode", SHAPE_MODE_PARAMS, indirect=["shapes"]) def test_buffer_shapes_and_indices(shapes, mode, apply_patches): - state = ModelState(None) + state = ModelState() constants = DummyModelRunner(shapes).constants for key in shapes: buf = state.temporal_buffers.get(key, None) @@ -236,7 +236,7 @@ def dynamic_buffer_update(state, key, new_val, mode): @pytest.mark.parametrize("shapes,mode", SHAPE_MODE_PARAMS, indirect=["shapes"]) @pytest.mark.parametrize("key", ["desire", "features_buffer", "prev_desired_curv"]) def test_buffer_update_equivalence(shapes, mode, key, apply_patches): - state = ModelState(None) + state = ModelState() if key == "desire": desire_keys = [k for k in shapes.keys() if k.startswith('desire')] if desire_keys: diff --git a/sunnypilot/modeld_v2/tests/test_warp.py b/sunnypilot/modeld_v2/tests/test_warp.py new file mode 100644 index 000000000..daf0dd528 --- /dev/null +++ b/sunnypilot/modeld_v2/tests/test_warp.py @@ -0,0 +1,102 @@ +import os +os.environ['DEV'] = 'CPU' +import pytest +import numpy as np +from openpilot.selfdrive.modeld.compile_warp import get_nv12_info, CAMERA_CONFIGS +from openpilot.sunnypilot.modeld_v2.warp import Warp, MODEL_W, MODEL_H + +VISION_NAME_PAIRS = [ # needed to account for supercombos input_imgs + ('img', 'big_img'), + ('input_imgs', 'big_input_imgs'), +] + + +class MockVisionBuf: + def __init__(self, w, h): + self.width = w + self.height = h + _, _, _, yuv_size = get_nv12_info(w, h) + self.data = np.zeros(yuv_size, dtype=np.uint8) + + +@pytest.mark.parametrize("buffer_length", [2, 5]) +def test_warp_initialization(buffer_length): + warp = Warp(buffer_length) + assert warp.buffer_length == buffer_length + assert warp.img_buffer_shape == (buffer_length * 6, MODEL_H // 2, MODEL_W // 2) + + +@pytest.mark.parametrize("buffer_length", [2, 5]) +@pytest.mark.parametrize("cam_w, cam_h", CAMERA_CONFIGS) +@pytest.mark.parametrize("road, wide", VISION_NAME_PAIRS) +def test_warp_process(buffer_length, cam_w, cam_h, road, wide): + warp = Warp(buffer_length) + mock_buf = MockVisionBuf(cam_w, cam_h) + transform = np.eye(3, dtype=np.float32).flatten() + bufs = {road: mock_buf, wide: mock_buf} + transforms = {road: transform, wide: transform} + + out = warp.process(bufs, transforms) + assert isinstance(out, dict) + assert road in out and wide in out + assert out[road].shape == (1, 12, MODEL_H // 2, MODEL_W // 2) + assert out[wide].shape == (1, 12, MODEL_H // 2, MODEL_W // 2) + + key = (cam_w, cam_h) + assert key in warp.jit_cache + + out2 = warp.process(bufs, transforms) + assert out2[road].shape == out[road].shape + + +@pytest.mark.parametrize("road, wide", VISION_NAME_PAIRS) +def test_warp_buffer_shift(road, wide): + warp = Warp(2) + cam_w, cam_h = CAMERA_CONFIGS[1] + transform = np.eye(3, dtype=np.float32).flatten() + + buf1 = MockVisionBuf(cam_w, cam_h) + buf1.data[0] = 255 + bufs1 = {road: buf1, wide: buf1} + transforms = {road: transform, wide: transform} + out1 = warp.process(bufs1, transforms) + road1 = out1[road].numpy().copy() + + buf2 = MockVisionBuf(cam_w, cam_h) + buf2.data[0] = 128 + bufs2 = {road: buf2, wide: buf2} + out2 = warp.process(bufs2, transforms) + assert not np.array_equal(road1, out2[road].numpy()) + + +@pytest.mark.parametrize("buffer_length", [2, 5]) +@pytest.mark.parametrize("road, wide", VISION_NAME_PAIRS) +def test_warp_buffer_accumulation(buffer_length, road, wide): + warp = Warp(buffer_length) + cam_w, cam_h = CAMERA_CONFIGS[0] + transform = np.eye(3, dtype=np.float32).flatten() + transforms = {road: transform, wide: transform} + outputs = [] + + for i in range(buffer_length + 1): + buf = MockVisionBuf(cam_w, cam_h) + buf.data[:] = i * 10 + out = warp.process({road: buf, wide: buf}, transforms) + outputs.append(out[road].numpy().copy()) + + assert warp.full_buffers['img'].shape == (buffer_length * 6, MODEL_H // 2, MODEL_W // 2) + for i in range(1, len(outputs)): + assert not np.array_equal(outputs[i - 1], outputs[i]) + + +def test_warp_different_cameras_same_instance(): + warp = Warp(2) + transform = np.eye(3, dtype=np.float32).flatten() + + buf1 = MockVisionBuf(*CAMERA_CONFIGS[0]) + warp.process({'img': buf1, 'big_img': buf1}, {'img': transform, 'big_img': transform}) + assert len(warp.jit_cache) == 1 + + buf2 = MockVisionBuf(*CAMERA_CONFIGS[1]) + warp.process({'img': buf2, 'big_img': buf2}, {'img': transform, 'big_img': transform}) + assert len(warp.jit_cache) == 2 diff --git a/sunnypilot/modeld_v2/transforms/loadyuv.cc b/sunnypilot/modeld_v2/transforms/loadyuv.cc deleted file mode 100644 index eb669a592..000000000 --- a/sunnypilot/modeld_v2/transforms/loadyuv.cc +++ /dev/null @@ -1,76 +0,0 @@ -#include "sunnypilot/modeld_v2/transforms/loadyuv.h" - -#include -#include -#include - -void loadyuv_init(LoadYUVState* s, cl_context ctx, cl_device_id device_id, int width, int height) { - memset(s, 0, sizeof(*s)); - - s->width = width; - s->height = height; - - char args[1024]; - snprintf(args, sizeof(args), - "-cl-fast-relaxed-math -cl-denorms-are-zero " - "-DTRANSFORMED_WIDTH=%d -DTRANSFORMED_HEIGHT=%d", - width, height); - cl_program prg = cl_program_from_file(ctx, device_id, LOADYUV_PATH, args); - - s->loadys_krnl = CL_CHECK_ERR(clCreateKernel(prg, "loadys", &err)); - s->loaduv_krnl = CL_CHECK_ERR(clCreateKernel(prg, "loaduv", &err)); - s->copy_krnl = CL_CHECK_ERR(clCreateKernel(prg, "copy", &err)); - - // done with this - CL_CHECK(clReleaseProgram(prg)); -} - -void loadyuv_destroy(LoadYUVState* s) { - CL_CHECK(clReleaseKernel(s->loadys_krnl)); - CL_CHECK(clReleaseKernel(s->loaduv_krnl)); - CL_CHECK(clReleaseKernel(s->copy_krnl)); -} - -void loadyuv_queue(LoadYUVState* s, cl_command_queue q, - cl_mem y_cl, cl_mem u_cl, cl_mem v_cl, - cl_mem out_cl) { - cl_int global_out_off = 0; - - CL_CHECK(clSetKernelArg(s->loadys_krnl, 0, sizeof(cl_mem), &y_cl)); - CL_CHECK(clSetKernelArg(s->loadys_krnl, 1, sizeof(cl_mem), &out_cl)); - CL_CHECK(clSetKernelArg(s->loadys_krnl, 2, sizeof(cl_int), &global_out_off)); - - const size_t loadys_work_size = (s->width*s->height)/8; - CL_CHECK(clEnqueueNDRangeKernel(q, s->loadys_krnl, 1, NULL, - &loadys_work_size, NULL, 0, 0, NULL)); - - const size_t loaduv_work_size = ((s->width/2)*(s->height/2))/8; - global_out_off += (s->width*s->height); - - CL_CHECK(clSetKernelArg(s->loaduv_krnl, 0, sizeof(cl_mem), &u_cl)); - CL_CHECK(clSetKernelArg(s->loaduv_krnl, 1, sizeof(cl_mem), &out_cl)); - CL_CHECK(clSetKernelArg(s->loaduv_krnl, 2, sizeof(cl_int), &global_out_off)); - - CL_CHECK(clEnqueueNDRangeKernel(q, s->loaduv_krnl, 1, NULL, - &loaduv_work_size, NULL, 0, 0, NULL)); - - global_out_off += (s->width/2)*(s->height/2); - - CL_CHECK(clSetKernelArg(s->loaduv_krnl, 0, sizeof(cl_mem), &v_cl)); - CL_CHECK(clSetKernelArg(s->loaduv_krnl, 1, sizeof(cl_mem), &out_cl)); - CL_CHECK(clSetKernelArg(s->loaduv_krnl, 2, sizeof(cl_int), &global_out_off)); - - CL_CHECK(clEnqueueNDRangeKernel(q, s->loaduv_krnl, 1, NULL, - &loaduv_work_size, NULL, 0, 0, NULL)); -} - -void copy_queue(LoadYUVState* s, cl_command_queue q, cl_mem src, cl_mem dst, - size_t src_offset, size_t dst_offset, size_t size) { - CL_CHECK(clSetKernelArg(s->copy_krnl, 0, sizeof(cl_mem), &src)); - CL_CHECK(clSetKernelArg(s->copy_krnl, 1, sizeof(cl_mem), &dst)); - CL_CHECK(clSetKernelArg(s->copy_krnl, 2, sizeof(cl_int), &src_offset)); - CL_CHECK(clSetKernelArg(s->copy_krnl, 3, sizeof(cl_int), &dst_offset)); - const size_t copy_work_size = size/8; - CL_CHECK(clEnqueueNDRangeKernel(q, s->copy_krnl, 1, NULL, - ©_work_size, NULL, 0, 0, NULL)); -} \ No newline at end of file diff --git a/sunnypilot/modeld_v2/transforms/loadyuv.cl b/sunnypilot/modeld_v2/transforms/loadyuv.cl deleted file mode 100644 index 970187a6d..000000000 --- a/sunnypilot/modeld_v2/transforms/loadyuv.cl +++ /dev/null @@ -1,47 +0,0 @@ -#define UV_SIZE ((TRANSFORMED_WIDTH/2)*(TRANSFORMED_HEIGHT/2)) - -__kernel void loadys(__global uchar8 const * const Y, - __global uchar * out, - int out_offset) -{ - const int gid = get_global_id(0); - const int ois = gid * 8; - const int oy = ois / TRANSFORMED_WIDTH; - const int ox = ois % TRANSFORMED_WIDTH; - - const uchar8 ys = Y[gid]; - - // 02 - // 13 - - __global uchar* outy0; - __global uchar* outy1; - if ((oy & 1) == 0) { - outy0 = out + out_offset; //y0 - outy1 = out + out_offset + UV_SIZE*2; //y2 - } else { - outy0 = out + out_offset + UV_SIZE; //y1 - outy1 = out + out_offset + UV_SIZE*3; //y3 - } - - vstore4(ys.s0246, 0, outy0 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2); - vstore4(ys.s1357, 0, outy1 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2); -} - -__kernel void loaduv(__global uchar8 const * const in, - __global uchar8 * out, - int out_offset) -{ - const int gid = get_global_id(0); - const uchar8 inv = in[gid]; - out[gid + out_offset / 8] = inv; -} - -__kernel void copy(__global uchar8 * in, - __global uchar8 * out, - int in_offset, - int out_offset) -{ - const int gid = get_global_id(0); - out[gid + out_offset / 8] = in[gid + in_offset / 8]; -} diff --git a/sunnypilot/modeld_v2/transforms/loadyuv.h b/sunnypilot/modeld_v2/transforms/loadyuv.h deleted file mode 100644 index 659059cd2..000000000 --- a/sunnypilot/modeld_v2/transforms/loadyuv.h +++ /dev/null @@ -1,20 +0,0 @@ -#pragma once - -#include "common/clutil.h" - -typedef struct { - int width, height; - cl_kernel loadys_krnl, loaduv_krnl, copy_krnl; -} LoadYUVState; - -void loadyuv_init(LoadYUVState* s, cl_context ctx, cl_device_id device_id, int width, int height); - -void loadyuv_destroy(LoadYUVState* s); - -void loadyuv_queue(LoadYUVState* s, cl_command_queue q, - cl_mem y_cl, cl_mem u_cl, cl_mem v_cl, - cl_mem out_cl); - - -void copy_queue(LoadYUVState* s, cl_command_queue q, cl_mem src, cl_mem dst, - size_t src_offset, size_t dst_offset, size_t size); \ No newline at end of file diff --git a/sunnypilot/modeld_v2/transforms/transform.cc b/sunnypilot/modeld_v2/transforms/transform.cc deleted file mode 100644 index adc9bcebf..000000000 --- a/sunnypilot/modeld_v2/transforms/transform.cc +++ /dev/null @@ -1,97 +0,0 @@ -#include "sunnypilot/modeld_v2/transforms/transform.h" - -#include -#include - -#include "common/clutil.h" - -void transform_init(Transform* s, cl_context ctx, cl_device_id device_id) { - memset(s, 0, sizeof(*s)); - - cl_program prg = cl_program_from_file(ctx, device_id, TRANSFORM_PATH, ""); - s->krnl = CL_CHECK_ERR(clCreateKernel(prg, "warpPerspective", &err)); - // done with this - CL_CHECK(clReleaseProgram(prg)); - - s->m_y_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_WRITE, 3*3*sizeof(float), NULL, &err)); - s->m_uv_cl = CL_CHECK_ERR(clCreateBuffer(ctx, CL_MEM_READ_WRITE, 3*3*sizeof(float), NULL, &err)); -} - -void transform_destroy(Transform* s) { - CL_CHECK(clReleaseMemObject(s->m_y_cl)); - CL_CHECK(clReleaseMemObject(s->m_uv_cl)); - CL_CHECK(clReleaseKernel(s->krnl)); -} - -void transform_queue(Transform* s, - cl_command_queue q, - cl_mem in_yuv, int in_width, int in_height, int in_stride, int in_uv_offset, - cl_mem out_y, cl_mem out_u, cl_mem out_v, - int out_width, int out_height, - const mat3& projection) { - const int zero = 0; - - // sampled using pixel center origin - // (because that's how fastcv and opencv does it) - - mat3 projection_y = projection; - - // in and out uv is half the size of y. - mat3 projection_uv = transform_scale_buffer(projection, 0.5); - - CL_CHECK(clEnqueueWriteBuffer(q, s->m_y_cl, CL_TRUE, 0, 3*3*sizeof(float), (void*)projection_y.v, 0, NULL, NULL)); - CL_CHECK(clEnqueueWriteBuffer(q, s->m_uv_cl, CL_TRUE, 0, 3*3*sizeof(float), (void*)projection_uv.v, 0, NULL, NULL)); - - const int in_y_width = in_width; - const int in_y_height = in_height; - const int in_y_px_stride = 1; - const int in_uv_width = in_width/2; - const int in_uv_height = in_height/2; - const int in_uv_px_stride = 2; - const int in_u_offset = in_uv_offset; - const int in_v_offset = in_uv_offset + 1; - - const int out_y_width = out_width; - const int out_y_height = out_height; - const int out_uv_width = out_width/2; - const int out_uv_height = out_height/2; - - CL_CHECK(clSetKernelArg(s->krnl, 0, sizeof(cl_mem), &in_yuv)); // src - CL_CHECK(clSetKernelArg(s->krnl, 1, sizeof(cl_int), &in_stride)); // src_row_stride - CL_CHECK(clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_y_px_stride)); // src_px_stride - CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &zero)); // src_offset - CL_CHECK(clSetKernelArg(s->krnl, 4, sizeof(cl_int), &in_y_height)); // src_rows - CL_CHECK(clSetKernelArg(s->krnl, 5, sizeof(cl_int), &in_y_width)); // src_cols - CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_mem), &out_y)); // dst - CL_CHECK(clSetKernelArg(s->krnl, 7, sizeof(cl_int), &out_y_width)); // dst_row_stride - CL_CHECK(clSetKernelArg(s->krnl, 8, sizeof(cl_int), &zero)); // dst_offset - CL_CHECK(clSetKernelArg(s->krnl, 9, sizeof(cl_int), &out_y_height)); // dst_rows - CL_CHECK(clSetKernelArg(s->krnl, 10, sizeof(cl_int), &out_y_width)); // dst_cols - CL_CHECK(clSetKernelArg(s->krnl, 11, sizeof(cl_mem), &s->m_y_cl)); // M - - const size_t work_size_y[2] = {(size_t)out_y_width, (size_t)out_y_height}; - - CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL, - (const size_t*)&work_size_y, NULL, 0, 0, NULL)); - - const size_t work_size_uv[2] = {(size_t)out_uv_width, (size_t)out_uv_height}; - - CL_CHECK(clSetKernelArg(s->krnl, 2, sizeof(cl_int), &in_uv_px_stride)); // src_px_stride - CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &in_u_offset)); // src_offset - CL_CHECK(clSetKernelArg(s->krnl, 4, sizeof(cl_int), &in_uv_height)); // src_rows - CL_CHECK(clSetKernelArg(s->krnl, 5, sizeof(cl_int), &in_uv_width)); // src_cols - CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_mem), &out_u)); // dst - CL_CHECK(clSetKernelArg(s->krnl, 7, sizeof(cl_int), &out_uv_width)); // dst_row_stride - CL_CHECK(clSetKernelArg(s->krnl, 8, sizeof(cl_int), &zero)); // dst_offset - CL_CHECK(clSetKernelArg(s->krnl, 9, sizeof(cl_int), &out_uv_height)); // dst_rows - CL_CHECK(clSetKernelArg(s->krnl, 10, sizeof(cl_int), &out_uv_width)); // dst_cols - CL_CHECK(clSetKernelArg(s->krnl, 11, sizeof(cl_mem), &s->m_uv_cl)); // M - - CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL, - (const size_t*)&work_size_uv, NULL, 0, 0, NULL)); - CL_CHECK(clSetKernelArg(s->krnl, 3, sizeof(cl_int), &in_v_offset)); // src_ofset - CL_CHECK(clSetKernelArg(s->krnl, 6, sizeof(cl_mem), &out_v)); // dst - - CL_CHECK(clEnqueueNDRangeKernel(q, s->krnl, 2, NULL, - (const size_t*)&work_size_uv, NULL, 0, 0, NULL)); -} diff --git a/sunnypilot/modeld_v2/transforms/transform.cl b/sunnypilot/modeld_v2/transforms/transform.cl deleted file mode 100644 index 2ca25920c..000000000 --- a/sunnypilot/modeld_v2/transforms/transform.cl +++ /dev/null @@ -1,54 +0,0 @@ -#define INTER_BITS 5 -#define INTER_TAB_SIZE (1 << INTER_BITS) -#define INTER_SCALE 1.f / INTER_TAB_SIZE - -#define INTER_REMAP_COEF_BITS 15 -#define INTER_REMAP_COEF_SCALE (1 << INTER_REMAP_COEF_BITS) - -__kernel void warpPerspective(__global const uchar * src, - int src_row_stride, int src_px_stride, int src_offset, int src_rows, int src_cols, - __global uchar * dst, - int dst_row_stride, int dst_offset, int dst_rows, int dst_cols, - __constant float * M) -{ - int dx = get_global_id(0); - int dy = get_global_id(1); - - if (dx < dst_cols && dy < dst_rows) - { - float X0 = M[0] * dx + M[1] * dy + M[2]; - float Y0 = M[3] * dx + M[4] * dy + M[5]; - float W = M[6] * dx + M[7] * dy + M[8]; - W = W != 0.0f ? INTER_TAB_SIZE / W : 0.0f; - int X = rint(X0 * W), Y = rint(Y0 * W); - - int sx = convert_short_sat(X >> INTER_BITS); - int sy = convert_short_sat(Y >> INTER_BITS); - - short sx_clamp = clamp(sx, 0, src_cols - 1); - short sx_p1_clamp = clamp(sx + 1, 0, src_cols - 1); - short sy_clamp = clamp(sy, 0, src_rows - 1); - short sy_p1_clamp = clamp(sy + 1, 0, src_rows - 1); - int v0 = convert_int(src[mad24(sy_clamp, src_row_stride, src_offset + sx_clamp*src_px_stride)]); - int v1 = convert_int(src[mad24(sy_clamp, src_row_stride, src_offset + sx_p1_clamp*src_px_stride)]); - int v2 = convert_int(src[mad24(sy_p1_clamp, src_row_stride, src_offset + sx_clamp*src_px_stride)]); - int v3 = convert_int(src[mad24(sy_p1_clamp, src_row_stride, src_offset + sx_p1_clamp*src_px_stride)]); - - short ay = (short)(Y & (INTER_TAB_SIZE - 1)); - short ax = (short)(X & (INTER_TAB_SIZE - 1)); - float taby = 1.f/INTER_TAB_SIZE*ay; - float tabx = 1.f/INTER_TAB_SIZE*ax; - - int dst_index = mad24(dy, dst_row_stride, dst_offset + dx); - - int itab0 = convert_short_sat_rte( (1.0f-taby)*(1.0f-tabx) * INTER_REMAP_COEF_SCALE ); - int itab1 = convert_short_sat_rte( (1.0f-taby)*tabx * INTER_REMAP_COEF_SCALE ); - int itab2 = convert_short_sat_rte( taby*(1.0f-tabx) * INTER_REMAP_COEF_SCALE ); - int itab3 = convert_short_sat_rte( taby*tabx * INTER_REMAP_COEF_SCALE ); - - int val = v0 * itab0 + v1 * itab1 + v2 * itab2 + v3 * itab3; - - uchar pix = convert_uchar_sat((val + (1 << (INTER_REMAP_COEF_BITS-1))) >> INTER_REMAP_COEF_BITS); - dst[dst_index] = pix; - } -} diff --git a/sunnypilot/modeld_v2/transforms/transform.h b/sunnypilot/modeld_v2/transforms/transform.h deleted file mode 100644 index 771a7054b..000000000 --- a/sunnypilot/modeld_v2/transforms/transform.h +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once - -#define CL_USE_DEPRECATED_OPENCL_1_2_APIS -#ifdef __APPLE__ -#include -#else -#include -#endif - -#include "common/mat.h" - -typedef struct { - cl_kernel krnl; - cl_mem m_y_cl, m_uv_cl; -} Transform; - -void transform_init(Transform* s, cl_context ctx, cl_device_id device_id); - -void transform_destroy(Transform* transform); - -void transform_queue(Transform* s, cl_command_queue q, - cl_mem yuv, int in_width, int in_height, int in_stride, int in_uv_offset, - cl_mem out_y, cl_mem out_u, cl_mem out_v, - int out_width, int out_height, - const mat3& projection); diff --git a/sunnypilot/modeld_v2/warp.py b/sunnypilot/modeld_v2/warp.py new file mode 100644 index 000000000..829cbcca4 --- /dev/null +++ b/sunnypilot/modeld_v2/warp.py @@ -0,0 +1,137 @@ +import pickle +import time +import numpy as np +from pathlib import Path +from tinygrad.tensor import Tensor +from tinygrad.engine.jit import TinyJit +from tinygrad.device import Device + +from openpilot.system.camerad.cameras.nv12_info import get_nv12_info +from openpilot.selfdrive.modeld.compile_warp import ( + CAMERA_CONFIGS, MEDMODEL_INPUT_SIZE, make_frame_prepare, make_update_both_imgs, + warp_pkl_path, +) + +MODELS_DIR = Path(__file__).parent / 'models' +MODEL_W, MODEL_H = MEDMODEL_INPUT_SIZE +UPSTREAM_BUFFER_LENGTH = 5 + + +def v2_warp_pkl_path(cam_w, cam_h, buffer_length): + return MODELS_DIR / f'warp_{cam_w}x{cam_h}_b{buffer_length}_tinygrad.pkl' + + +def compile_v2_warp(cam_w, cam_h, buffer_length): + _, _, _, yuv_size = get_nv12_info(cam_w, cam_h) + img_buffer_shape = (buffer_length * 6, MODEL_H // 2, MODEL_W // 2) + + print(f"Compiling v2 warp for {cam_w}x{cam_h} buffer_length={buffer_length}...") + + frame_prepare = make_frame_prepare(cam_w, cam_h, MODEL_W, MODEL_H) + update_both_imgs = make_update_both_imgs(frame_prepare, MODEL_W, MODEL_H) + update_img_jit = TinyJit(update_both_imgs, prune=True) + + full_buffer = Tensor.zeros(img_buffer_shape, dtype='uint8').contiguous().realize() + big_full_buffer = Tensor.zeros(img_buffer_shape, dtype='uint8').contiguous().realize() + full_buffer_np = np.zeros(img_buffer_shape, dtype=np.uint8) + big_full_buffer_np = np.zeros(img_buffer_shape, dtype=np.uint8) + + for i in range(10): + new_frame_np = (32 * np.random.randn(yuv_size).astype(np.float32) + 128).clip(0, 255).astype(np.uint8) + img_inputs = [full_buffer, + Tensor.from_blob(new_frame_np.ctypes.data, (yuv_size,), dtype='uint8').realize(), + Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')] + new_big_frame_np = (32 * np.random.randn(yuv_size).astype(np.float32) + 128).clip(0, 255).astype(np.uint8) + big_img_inputs = [big_full_buffer, + Tensor.from_blob(new_big_frame_np.ctypes.data, (yuv_size,), dtype='uint8').realize(), + Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')] + inputs = img_inputs + big_img_inputs + Device.default.synchronize() + + inputs_np = [x.numpy() for x in inputs] + inputs_np[0] = full_buffer_np + inputs_np[3] = big_full_buffer_np + + st = time.perf_counter() + out = update_img_jit(*inputs) + full_buffer = out[0].contiguous().realize().clone() + big_full_buffer = out[2].contiguous().realize().clone() + mt = time.perf_counter() + Device.default.synchronize() + et = time.perf_counter() + print(f" [{i+1}/10] enqueue {(mt-st)*1e3:6.2f} ms -- total {(et-st)*1e3:6.2f} ms") + + pkl_path = v2_warp_pkl_path(cam_w, cam_h, buffer_length) + with open(pkl_path, "wb") as f: + pickle.dump(update_img_jit, f) + print(f" Saved to {pkl_path}") + + jit = pickle.load(open(pkl_path, "rb")) + jit(*inputs) + + +class Warp: + def __init__(self, buffer_length=2): + self.buffer_length = buffer_length + self.img_buffer_shape = (buffer_length * 6, MODEL_H // 2, MODEL_W // 2) + + self.jit_cache = {} + self.full_buffers = {k: Tensor.zeros(self.img_buffer_shape, dtype='uint8').contiguous().realize() for k in ['img', 'big_img']} + self._blob_cache: dict[int, Tensor] = {} + self._nv12_cache: dict[tuple[int, int], int] = {} + self.transforms_np = {k: np.zeros((3, 3), dtype=np.float32) for k in ['img', 'big_img']} + self.transforms = {k: Tensor(v, device='NPY').realize() for k, v in self.transforms_np.items()} + + def process(self, bufs, transforms): + if not bufs: + return {} + road = next(n for n in bufs if 'big' not in n) + wide = next(n for n in bufs if 'big' in n) + cam_w, cam_h = bufs[road].width, bufs[road].height + key = (cam_w, cam_h) + + if key not in self.jit_cache: + v2_pkl = v2_warp_pkl_path(cam_w, cam_h, self.buffer_length) + if v2_pkl.exists(): + with open(v2_pkl, 'rb') as f: + self.jit_cache[key] = pickle.load(f) + elif self.buffer_length == UPSTREAM_BUFFER_LENGTH: + upstream_pkl = warp_pkl_path(cam_w, cam_h) + if upstream_pkl.exists(): + with open(upstream_pkl, 'rb') as f: + self.jit_cache[key] = pickle.load(f) + if key not in self.jit_cache: + frame_prepare = make_frame_prepare(cam_w, cam_h, MODEL_W, MODEL_H) + update_both_imgs = make_update_both_imgs(frame_prepare, MODEL_W, MODEL_H) + self.jit_cache[key] = TinyJit(update_both_imgs, prune=True) + + if key not in self._nv12_cache: + self._nv12_cache[key] = get_nv12_info(cam_w, cam_h)[3] + yuv_size = self._nv12_cache[key] + + road_ptr = bufs[road].data.ctypes.data + wide_ptr = bufs[wide].data.ctypes.data + if road_ptr not in self._blob_cache: + self._blob_cache[road_ptr] = Tensor.from_blob(road_ptr, (yuv_size,), dtype='uint8') + if wide_ptr not in self._blob_cache: + self._blob_cache[wide_ptr] = Tensor.from_blob(wide_ptr, (yuv_size,), dtype='uint8') + road_blob = self._blob_cache[road_ptr] + wide_blob = self._blob_cache[wide_ptr] if wide_ptr != road_ptr else Tensor.from_blob(wide_ptr, (yuv_size,), dtype='uint8') + np.copyto(self.transforms_np['img'], transforms[road].reshape(3, 3)) + np.copyto(self.transforms_np['big_img'], transforms[wide].reshape(3, 3)) + + Device.default.synchronize() + res = self.jit_cache[key]( + self.full_buffers['img'], road_blob, self.transforms['img'], + self.full_buffers['big_img'], wide_blob, self.transforms['big_img'], + ) + self.full_buffers['img'], out_road = res[0].realize(), res[1].realize() + self.full_buffers['big_img'], out_wide = res[2].realize(), res[3].realize() + + return {road: out_road, wide: out_wide} + + +if __name__ == "__main__": + for cam_w, cam_h in CAMERA_CONFIGS: + for bl in [2, 5]: + compile_v2_warp(cam_w, cam_h, bl) diff --git a/sunnypilot/models/fetcher.py b/sunnypilot/models/fetcher.py index 0de749657..5990ee2e4 100644 --- a/sunnypilot/models/fetcher.py +++ b/sunnypilot/models/fetcher.py @@ -116,7 +116,7 @@ class ModelCache: class ModelFetcher: """Handles fetching and caching of model data from remote source""" - MODEL_URL = "https://raw.githubusercontent.com/sunnypilot/sunnypilot-docs/refs/heads/gh-pages/docs/driving_models_v14.json" + MODEL_URL = "https://raw.githubusercontent.com/sunnypilot/sunnypilot-docs/refs/heads/gh-pages/docs/driving_models_v15.json" def __init__(self, params: Params): self.params = params diff --git a/sunnypilot/models/helpers.py b/sunnypilot/models/helpers.py index 7fcf7f85e..98f7d9e38 100644 --- a/sunnypilot/models/helpers.py +++ b/sunnypilot/models/helpers.py @@ -13,16 +13,13 @@ import numpy as np from openpilot.common.params import Params from cereal import custom from openpilot.sunnypilot.modeld.constants import Meta, MetaTombRaider, MetaSimPose -from openpilot.sunnypilot.modeld.runners import ModelRunner -from openpilot.system.hardware import PC from openpilot.system.hardware.hw import Paths from pathlib import Path # see the README.md for more details on the model selector versioning -CURRENT_SELECTOR_VERSION = 14 +CURRENT_SELECTOR_VERSION = 15 REQUIRED_MIN_SELECTOR_VERSION = 14 -USE_ONNX = os.getenv('USE_ONNX', PC) CUSTOM_MODEL_PATH = Paths.model_root() METADATA_PATH = Path(__file__).parent / '../models/supercombo_metadata.pkl' @@ -122,16 +119,6 @@ def _get_model(): return None -def get_model_path(): - if USE_ONNX: - return {ModelRunner.ONNX: Path(__file__).parent / '../models/supercombo.onnx'} - - if model := _get_model(): - return {ModelRunner.THNEED: f"{CUSTOM_MODEL_PATH}/{model.artifact.fileName}"} - - return {ModelRunner.THNEED: Path(__file__).parent / '../models/supercombo.thneed'} - - def load_metadata(): metadata_path = METADATA_PATH diff --git a/sunnypilot/models/runners/constants.py b/sunnypilot/models/runners/constants.py index cbd1fdb37..acb316888 100644 --- a/sunnypilot/models/runners/constants.py +++ b/sunnypilot/models/runners/constants.py @@ -1,6 +1,5 @@ import os import numpy as np -from openpilot.sunnypilot.modeld_v2.models.commonmodel_pyx import DrivingModelFrame, CLMem from openpilot.system.hardware.hw import Paths from cereal import custom @@ -8,8 +7,6 @@ from cereal import custom NumpyDict = dict[str, np.ndarray] ShapeDict = dict[str, tuple[int, ...]] SliceDict = dict[str, slice] -CLMemDict = dict[str, CLMem] -FrameDict = dict[str, DrivingModelFrame] ModelType = custom.ModelManagerSP.Model.Type Model = custom.ModelManagerSP.Model diff --git a/sunnypilot/models/runners/model_runner.py b/sunnypilot/models/runners/model_runner.py index a49ff4d20..051fa349d 100644 --- a/sunnypilot/models/runners/model_runner.py +++ b/sunnypilot/models/runners/model_runner.py @@ -2,7 +2,7 @@ from abc import abstractmethod, ABC import numpy as np from openpilot.sunnypilot.models.helpers import get_active_bundle -from openpilot.sunnypilot.models.runners.constants import NumpyDict, ShapeDict, CLMemDict, FrameDict, Model, SliceDict, SEND_RAW_PRED +from openpilot.sunnypilot.models.runners.constants import NumpyDict, ShapeDict, Model, SliceDict, SEND_RAW_PRED from openpilot.system.hardware.hw import Paths import pickle @@ -133,13 +133,11 @@ class ModelRunner(ModularRunner): raise ValueError("Model data is not available. Ensure the model is loaded correctly.") @abstractmethod - def prepare_inputs(self, imgs_cl: CLMemDict, numpy_inputs: NumpyDict, frames: FrameDict) -> dict: + def prepare_inputs(self, numpy_inputs: NumpyDict) -> dict: """ Abstract method to prepare inputs for model inference. - :param imgs_cl: Dictionary of OpenCL memory objects for image inputs. :param numpy_inputs: Dictionary of numpy arrays for non-image inputs. - :param frames: Dictionary of DrivingModelFrame objects for context. :return: Dictionary of prepared inputs ready for the model. """ raise NotImplementedError diff --git a/sunnypilot/models/runners/onnx/onnx_runner.py b/sunnypilot/models/runners/onnx/onnx_runner.py deleted file mode 100644 index 1ffead456..000000000 --- a/sunnypilot/models/runners/onnx/onnx_runner.py +++ /dev/null @@ -1,62 +0,0 @@ -import numpy as np - -from openpilot.sunnypilot.modeld_v2 import MODEL_PATH -from openpilot.sunnypilot.modeld_v2.runners.ort_helpers import make_onnx_cpu_runner, ORT_TYPES_TO_NP_TYPES -from openpilot.sunnypilot.models.runners.constants import ModelType, ShapeDict, CLMemDict, NumpyDict, FrameDict -from openpilot.sunnypilot.models.runners.model_runner import ModelRunner -from openpilot.sunnypilot.modeld_v2.constants import ModelConstants - - -class ONNXRunner(ModelRunner): - """ - A ModelRunner implementation for executing ONNX models using ONNX Runtime CPU. - - Handles loading the ONNX model, preparing inputs as numpy arrays, running - inference, and parsing outputs. This runner is typically used on non-TICI platforms. - """ - def __init__(self): - super().__init__() - # Initialize ONNX Runtime session for the model at MODEL_PATH - self.runner = make_onnx_cpu_runner(MODEL_PATH) - # Map expected input names to numpy dtypes - self.input_to_nptype = { - model_input.name: ORT_TYPES_TO_NP_TYPES[model_input.type] - for model_input in self.runner.get_inputs() - } - # For ONNX, _model_data isn't strictly necessary as shapes/types come from the runner - # However, we might still need output_slices if custom models define them. - # We assume supercombo type for potentially loading output_slices metadata if available. - self._model_data = self.models.get(ModelType.supercombo) - self._constants = ModelConstants # Constants for ONNX models, if needed - - @property - def input_shapes(self) -> ShapeDict: - """Returns the input shapes defined in the ONNX model.""" - # ONNX shapes are derived directly from the model definition via the runner - return {runner_input.name: runner_input.shape for runner_input in self.runner.get_inputs()} - - def prepare_inputs(self, imgs_cl: CLMemDict, numpy_inputs: NumpyDict, frames: FrameDict) -> dict: - """Prepares inputs for the ONNX model as numpy arrays.""" - self.inputs = numpy_inputs # Start with non-image numpy inputs - # Convert image inputs from OpenCL buffers to numpy arrays - for key in imgs_cl: - buffer = frames[key].buffer_from_cl(imgs_cl[key]) - reshaped_buffer = buffer.reshape(self.input_shapes[key]) - self.inputs[key] = reshaped_buffer.astype(dtype=self.input_to_nptype[key]) - return self.inputs - - def _parse_outputs(self, model_outputs: np.ndarray) -> NumpyDict: - """Parses the raw ONNX model outputs using the standard Parser.""" - # Use slicing if metadata is available, otherwise pass raw outputs - if self._model_data is None: - raise ValueError("Model data is not available. Ensure the model is loaded correctly.") - - outputs_to_parse = self._slice_outputs(model_outputs) if self._model_data else {'raw_pred': model_outputs} - result: NumpyDict = self.parser_method_dict[self._model_data.model.type.raw](outputs_to_parse) - return result - - def _run_model(self) -> NumpyDict: - """Runs the ONNX model inference and parses the outputs.""" - # Execute the ONNX Runtime session - outputs = self.runner.run(None, self.inputs)[0].flatten() - return self._parse_outputs(outputs) diff --git a/sunnypilot/models/runners/tinygrad/tinygrad_runner.py b/sunnypilot/models/runners/tinygrad/tinygrad_runner.py index 2df1c65e0..9033c892e 100644 --- a/sunnypilot/models/runners/tinygrad/tinygrad_runner.py +++ b/sunnypilot/models/runners/tinygrad/tinygrad_runner.py @@ -1,11 +1,9 @@ import pickle import numpy as np -from openpilot.sunnypilot.modeld_v2.runners.tinygrad_helpers import qcom_tensor_from_opencl_address -from openpilot.sunnypilot.models.runners.constants import CLMemDict, FrameDict, NumpyDict, ModelType, ShapeDict, CUSTOM_MODEL_PATH, SliceDict +from openpilot.sunnypilot.models.runners.constants import NumpyDict, ModelType, ShapeDict, CUSTOM_MODEL_PATH, SliceDict from openpilot.sunnypilot.models.runners.model_runner import ModelRunner from openpilot.sunnypilot.models.runners.tinygrad.model_types import PolicyTinygrad, VisionTinygrad, SupercomboTinygrad, OffPolicyTinygrad -from openpilot.system.hardware import TICI from openpilot.sunnypilot.models.split_model_constants import SplitModelConstants from openpilot.sunnypilot.modeld_v2.constants import ModelConstants @@ -54,37 +52,31 @@ class TinygradRunner(ModelRunner, SupercomboTinygrad, PolicyTinygrad, VisionTiny info = self.model_run.captured.expected_input_info[idx] self.input_to_dtype[name] = info[2] # dtype self.input_to_device[name] = info[3] # device + self._policy_cached = False @property def vision_input_names(self) -> list[str]: """Returns the list of vision input names from the input shapes.""" return [name for name in self.input_shapes.keys() if 'img' in name] - def prepare_vision_inputs(self, imgs_cl: CLMemDict, frames: FrameDict): - """Prepares vision (image) inputs as Tinygrad Tensors.""" - for key in imgs_cl: - if TICI and key not in self.inputs: - # On TICI, directly use OpenCL memory address for efficiency via QCOM extensions - self.inputs[key] = qcom_tensor_from_opencl_address(imgs_cl[key].mem_address, self.input_shapes[key], dtype=self.input_to_dtype[key]) - elif not TICI: - # On other platforms, copy data from CL buffer to a numpy array first - shape = frames[key].buffer_from_cl(imgs_cl[key]).reshape(self.input_shapes[key]) - self.inputs[key] = Tensor(shape, device=self.input_to_device[key], dtype=self.input_to_dtype[key]).realize() def prepare_policy_inputs(self, numpy_inputs: NumpyDict): - """Prepares non-image (policy) inputs as Tinygrad Tensors.""" - for key, value in numpy_inputs.items(): - self.inputs[key] = Tensor(value, device=self.input_to_device[key], dtype=self.input_to_dtype[key]).realize() + if not self._policy_cached: + for key, value in numpy_inputs.items(): + self.inputs[key] = Tensor(value, device='NPY').realize() + self._policy_cached = True - def prepare_inputs(self, imgs_cl: CLMemDict, numpy_inputs: NumpyDict, frames: FrameDict) -> dict: + def prepare_inputs(self, numpy_inputs: NumpyDict) -> dict: """Prepares all vision and policy inputs for the model.""" - self.prepare_vision_inputs(imgs_cl, frames) self.prepare_policy_inputs(numpy_inputs) + for key in self.vision_input_names: + if key in self.inputs: + self.inputs[key] = self.inputs[key].cast(self.input_to_dtype[key]) return self.inputs def _run_model(self) -> NumpyDict: """Runs the Tinygrad model inference and parses the outputs.""" - outputs = self.model_run(**self.inputs).numpy().flatten() + outputs = self.model_run(**self.inputs).contiguous().realize().uop.base.buffer.numpy().flatten() return self._parse_outputs(outputs) def _parse_outputs(self, model_outputs: np.ndarray) -> NumpyDict: @@ -120,6 +112,9 @@ class TinygradSplitRunner(ModelRunner): off_policy_output = self.off_policy_runner.run_model() outputs.update(off_policy_output) + if 'planplus' in outputs and 'plan' in outputs: + outputs['plan'] = outputs['plan'] + outputs['planplus'] + return outputs @property @@ -143,17 +138,18 @@ class TinygradSplitRunner(ModelRunner): slices.update(self.off_policy_runner.output_slices) return slices - def prepare_inputs(self, imgs_cl: CLMemDict, numpy_inputs: NumpyDict, frames: FrameDict) -> dict: + def prepare_inputs(self, numpy_inputs: NumpyDict) -> dict: """Prepares inputs for both vision and policy models.""" # Policy inputs only depend on numpy_inputs self.policy_runner.prepare_policy_inputs(numpy_inputs) - # Vision inputs depend on imgs_cl and frames - self.vision_runner.prepare_vision_inputs(imgs_cl, frames) + + for key in self.vision_input_names: + if key in self.inputs: + self.vision_runner.inputs[key] = self.inputs[key].cast(self.vision_runner.input_to_dtype[key]) + inputs = {**self.policy_runner.inputs, **self.vision_runner.inputs} if self.off_policy_runner: self.off_policy_runner.prepare_policy_inputs(numpy_inputs) inputs.update(self.off_policy_runner.inputs) - - # Return combined inputs (though they are stored within respective runners) return inputs diff --git a/system/manager/process_config.py b/system/manager/process_config.py index 793fbc07f..2e43dfada 100644 --- a/system/manager/process_config.py +++ b/system/manager/process_config.py @@ -80,10 +80,6 @@ def use_sunnylink_uploader_shim(started, params, CP: car.CarParams) -> bool: """Shim for use_sunnylink_uploader to match the process manager signature.""" return use_sunnylink_uploader(params) -def is_snpe_model(started, params, CP: car.CarParams) -> bool: - """Check if the active model runner is SNPE.""" - return bool(get_active_model_runner(params, not started) == custom.ModelManagerSP.Runner.snpe) - def is_tinygrad_model(started, params, CP: car.CarParams) -> bool: """Check if the active model runner is SNPE.""" return bool(get_active_model_runner(params, not started) == custom.ModelManagerSP.Runner.tinygrad) @@ -170,7 +166,6 @@ procs = [ procs += [ # Models PythonProcess("models_manager", "sunnypilot.models.manager", only_offroad), - NativeProcess("modeld_snpe", "sunnypilot/modeld", ["./modeld"], and_(only_onroad, is_snpe_model)), NativeProcess("modeld_tinygrad", "sunnypilot/modeld_v2", ["./modeld"], and_(only_onroad, is_tinygrad_model)), # Backup