needs cleanup

2024-11-12 00:14:16 +00:00 · 2024-11-12 00:14:16 +00:00 · 8c817e305c
parent da952e9b64
commit 8c817e305c
6 changed files with 112 additions and 19 deletions
--- a/common/transformations/model.py
+++ b/common/transformations/model.py
@ -1,7 +1,7 @@
 import numpy as np

 from openpilot.common.transformations.orientation import rot_from_euler
-from openpilot.common.transformations.camera import get_view_frame_from_calib_frame, view_frame_from_device_frame
+from openpilot.common.transformations.camera import get_view_frame_from_calib_frame, view_frame_from_device_frame, _ar_ox_fisheye

 # segnet
 SEGNET_SIZE = (512, 384)
@ -39,6 +39,13 @@ sbigmodel_intrinsics = np.array([
  [0.0,  sbigmodel_fl,      0.5 * (256 + MEDMODEL_CY)],
  [0.0,  0.0,                                     1.0]])

+DM_INPUT_SIZE = (1440, 960)
+dmonitoringmodel_fl = _ar_ox_fisheye.focal_length
+dmonitoringmodel_intrinsics = np.array([
+  [dmonitoringmodel_fl,  0.0, DM_INPUT_SIZE[0]/2],
+  [0.0, dmonitoringmodel_fl, DM_INPUT_SIZE[1]/2 - (_ar_ox_fisheye.height - DM_INPUT_SIZE[1])/2],
+  [0.0,  0.0, 1.0]])
+
 bigmodel_frame_from_calib_frame = np.dot(bigmodel_intrinsics,
  get_view_frame_from_calib_frame(0, 0, 0, 0))

--- a/selfdrive/modeld/dmonitoringmodeld.py
+++ b/selfdrive/modeld/dmonitoringmodeld.py
@ -22,15 +22,16 @@ from msgq.visionipc import VisionIpcClient, VisionStreamType, VisionBuf
 from openpilot.common.swaglog import cloudlog
 from openpilot.common.params import Params
 from openpilot.common.realtime import set_realtime_priority
-from openpilot.selfdrive.modeld.models.commonmodel_pyx import CLContext #, cl_from_visionbuf
+from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye
+from openpilot.common.transformations.model import dmonitoringmodel_intrinsics, DM_INPUT_SIZE
+from openpilot.selfdrive.modeld.models.commonmodel_pyx import CLContext, MonitoringModelFrame #, cl_from_visionbuf
 from openpilot.selfdrive.modeld.parse_model_outputs import sigmoid
-#from openpilot.selfdrive.modeld.runners.tinygrad_helpers import qcom_tensor_from_opencl_address
+from openpilot.selfdrive.modeld.runners.tinygrad_helpers import qcom_tensor_from_opencl_address
 from tinygrad.tensor import Tensor
-#from tinygrad.dtype import dtypes
+from tinygrad.dtype import dtypes

+MODEL_WIDTH, MODEL_HEIGHT = DM_INPUT_SIZE
 CALIB_LEN = 3
-MODEL_WIDTH = 1440
-MODEL_HEIGHT = 960
 FEATURE_LEN = 512
 OUTPUT_SIZE = 84 + FEATURE_LEN

@ -70,25 +71,29 @@ class ModelState:

  def __init__(self, cl_ctx):
    assert ctypes.sizeof(DMonitoringModelResult) == OUTPUT_SIZE * ctypes.sizeof(ctypes.c_float)
-    self.numpy_inputs = {'calib': np.zeros((1, CALIB_LEN), dtype=np.float32),
-                         'input_img': np.zeros((1,MODEL_HEIGHT * MODEL_WIDTH), dtype=np.uint8)}
-    self.img = None
-
+    self.frame = MonitoringModelFrame(cl_ctx)
+    self.numpy_inputs = {
+      'calib': np.zeros((1, CALIB_LEN), dtype=np.float32),
+    }
+    self.img_inputs = {} # type: ignore

    with open(MODEL_PKL_PATH, "rb") as f:
      self.model_run = pickle.load(f)

-  def run(self, buf:VisionBuf, calib:np.ndarray) -> tuple[np.ndarray, float]:
+  def run(self, buf:VisionBuf, calib:np.ndarray, transform:np.ndarray) -> tuple[np.ndarray, float]:
    self.numpy_inputs['calib'][0,:] = calib

    t1 = time.perf_counter()
-    # TODO use opencl buffer directly to make tensor
-    v_offset = buf.height - MODEL_HEIGHT
-    h_offset = (buf.width - MODEL_WIDTH) // 2
-    buf_data = buf.data.reshape(-1, buf.stride)
-    self.numpy_inputs['input_img'][:] = buf_data[v_offset:v_offset+MODEL_HEIGHT, h_offset:h_offset+MODEL_WIDTH].reshape((1, -1))

-    tensor_inputs = {k: Tensor(v) for k,v in self.numpy_inputs.items()}
+    input_img_cl = self.frame.prepare(buf, transform.flatten())
+    if TICI:
+      # The imgs tensors are backed by opencl memory, only need init once
+      if 'input_img' not in self.img_inputs:
+        self.img_inputs['input_img'] = qcom_tensor_from_opencl_address(input_img_cl.mem_address, (1, MODEL_WIDTH*MODEL_HEIGHT), dtype=dtypes.uint8)
+    else:
+      self.img_inputs['input_img'] = Tensor(self.frame.buffer_from_cl(input_img_cl)).reshape((1, MODEL_WIDTH*MODEL_HEIGHT))
+
+    tensor_inputs = {**self.img_inputs, **{k: Tensor(v) for k,v in self.numpy_inputs.items()}}
    output = self.model_run(**tensor_inputs)['outputs'].numpy().flatten()

    t2 = time.perf_counter()
@ -146,18 +151,23 @@ def main():
  pm = PubMaster(["driverStateV2"])

  calib = np.zeros(CALIB_LEN, dtype=np.float32)
+  model_transform = None

  while True:
    buf = vipc_client.recv()
    if buf is None:
      continue

+    if model_transform is None:
+      from_intr = _os_fisheye.intrinsics if buf.width <= 1344 else _ar_ox_fisheye.intrinsics
+      model_transform = np.linalg.inv(np.dot(dmonitoringmodel_intrinsics, np.linalg.inv(from_intr))).astype(np.float32)
+
    sm.update(0)
    if sm.updated["liveCalibration"]:
      calib[:] = np.array(sm["liveCalibration"].rpyCalib)

    t1 = time.perf_counter()
-    model_output, gpu_execution_time = model.run(buf, calib)
+    model_output, gpu_execution_time = model.run(buf, calib, model_transform)
    t2 = time.perf_counter()

    pm.send("driverStateV2", get_driverstate_packet(model_output, vipc_client.frame_id, vipc_client.timestamp_sof, t2 - t1, gpu_execution_time))
--- a/selfdrive/modeld/models/commonmodel.cc
+++ b/selfdrive/modeld/models/commonmodel.cc
@ -56,4 +56,40 @@ ModelFrame::~ModelFrame() {
  CL_CHECK(clReleaseMemObject(u_cl));
  CL_CHECK(clReleaseMemObject(y_cl));
  CL_CHECK(clReleaseCommandQueue(q));
-}
+}
+
+MonitoringModelFrame::MonitoringModelFrame(cl_device_id device_id, cl_context context) {
+  input_frame = std::make_unique<uint8_t[]>(MODEL_FRAME_SIZE);
+  input_frame_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_FRAME_SIZE, NULL, &err));
+
+  q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err));
+  y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_WIDTH * MODEL_HEIGHT *sizeof(uint8_t), NULL, &err));
+  u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2) *sizeof(uint8_t), NULL, &err));
+  v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2) *sizeof(uint8_t), NULL, &err));
+
+  transform_init(&transform, context, device_id);
+}
+
+cl_mem* MonitoringModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3 &projection) {
+  transform_queue(&this->transform, q,
+                  yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset,
+                  y_cl, u_cl, v_cl, MODEL_WIDTH, MODEL_HEIGHT, projection);
+
+  // CL_CHECK(clEnqueueReadBuffer(q, y_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(uint8_t), input_frame.get(), 0, nullptr, nullptr));
+  clFinish(q);
+  return &y_cl;
+}
+
+uint8_t* MonitoringModelFrame::buffer_from_cl(cl_mem *in_frame) {
+  CL_CHECK(clEnqueueReadBuffer(q, *in_frame, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(uint8_t), input_frame.get(), 0, nullptr, nullptr));
+  clFinish(q);
+  return &input_frame[0];
+}
+
+MonitoringModelFrame::~MonitoringModelFrame() {
+  transform_destroy(&transform);
+  CL_CHECK(clReleaseMemObject(v_cl));
+  CL_CHECK(clReleaseMemObject(u_cl));
+  CL_CHECK(clReleaseMemObject(y_cl));
+  CL_CHECK(clReleaseCommandQueue(q));
+}
--- a/selfdrive/modeld/models/commonmodel.h
+++ b/selfdrive/modeld/models/commonmodel.h
@ -37,3 +37,21 @@ private:
  cl_buffer_region region;
  std::unique_ptr<uint8_t[]> input_frames;
 };
+
+class MonitoringModelFrame {
+public:
+  MonitoringModelFrame(cl_device_id device_id, cl_context context);
+  ~MonitoringModelFrame();
+  cl_mem* prepare(cl_mem yuv_cl, int width, int height, int frame_stride, int frame_uv_offset, const mat3& transform);
+  uint8_t* buffer_from_cl(cl_mem *in_frame);
+
+  const int MODEL_WIDTH = 1440;
+  const int MODEL_HEIGHT = 960;
+  const int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT;
+
+private:
+  Transform transform;
+  cl_command_queue q;
+  cl_mem y_cl, u_cl, v_cl, input_frame_cl;
+  std::unique_ptr<uint8_t[]> input_frame;
+};
--- a/selfdrive/modeld/models/commonmodel.pxd
+++ b/selfdrive/modeld/models/commonmodel.pxd
@ -17,3 +17,9 @@ cdef extern from "selfdrive/modeld/models/commonmodel.h":
    ModelFrame(cl_device_id, cl_context)
    cl_mem * prepare(cl_mem, int, int, int, int, mat3)
    unsigned char * buffer_from_cl(cl_mem*);
+
+  cppclass MonitoringModelFrame:
+    int MODEL_FRAME_SIZE
+    MonitoringModelFrame(cl_device_id, cl_context)
+    cl_mem * prepare(cl_mem, int, int, int, int, mat3)
+    unsigned char * buffer_from_cl(cl_mem*)
--- a/selfdrive/modeld/models/commonmodel_pyx.pyx
+++ b/selfdrive/modeld/models/commonmodel_pyx.pyx
@ -10,6 +10,7 @@ from msgq.visionipc.visionipc cimport cl_mem
 from msgq.visionipc.visionipc_pyx cimport VisionBuf, CLContext as BaseCLContext
 from .commonmodel cimport CL_DEVICE_TYPE_DEFAULT, cl_get_device_id, cl_create_context
 from .commonmodel cimport mat3, ModelFrame as cppModelFrame
+from .commonmodel cimport mat3, ModelFrame as cppModelFrame, MonitoringModelFrame as cppMonitoringModelFrame


 cdef class CLContext(BaseCLContext):
@ -51,3 +52,18 @@ cdef class ModelFrame:
    cdef unsigned char * data2
    data2 = self.frame.buffer_from_cl(in_frames.mem)
    return np.asarray(<cnp.uint8_t[:self.frame.buf_size]> data2)
+
+cdef class MonitoringModelFrame(ModelFrame):
+  cdef cppMonitoringModelFrame * dmframe
+
+  def __cinit__(self, CLContext context):
+    self.dmframe = new cppMonitoringModelFrame(context.device_id, context.context)
+
+  def __dealloc__(self):
+    del self.dmframe
+
+  def prepare(self, VisionBuf buf, float[:] projection):
+    cdef mat3 cprojection
+    memcpy(cprojection.v, &projection[0], 9*sizeof(float))
+    cdef cl_mem * data = self.dmframe.prepare(buf.buf.buf_cl, buf.width, buf.height, buf.stride, buf.uv_offset, cprojection)
+    return CLMem.create(data)