Files
sunnypilot/selfdrive/modeld/models/commonmodel.h
Harald Schäfer 7b5a4fbb03 Tinygrad runner (#34171)
* squash

* bump tg

* bump tg

* debump tinygrad

* bump tinygrad

* bump tg

* Skip init iteration

* fixes

* cleanups

* skip first test sample

* typos

* linter unhappy

* update cpu usage

* OPENCL just zeros for now

* imports

* Try printing

* Runs again, but slower

* unused import

* Allow more buffer with tg and all on gpu

* bump tinygrad

* seems ok

* stricter timings for driving looser for dm

* try llvm

* check nvidia

* More timeout for now

* make test pass

* Revert "try llvm"

This reverts commit ef136e478320101fea262bae3579e558da991902.

* small fixes

* whitespace

* revert test timeout

* No model runners

* Always CPU always fast

* No onnx runtime GPU

* more cores

* cleanup

* Is this faster

* Is this faster

* at least runs

* FP32 is faster than 16

* fix deps

* whitespace

* comment

---------

Co-authored-by: Adeeb Shihadeh <adeebshihadeh@gmail.com>
2024-12-11 23:15:20 -08:00

40 lines
1.0 KiB
C++

#pragma once
#include <cfloat>
#include <cstdlib>
#include <memory>
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#ifdef __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
#include "common/mat.h"
#include "selfdrive/modeld/transforms/loadyuv.h"
#include "selfdrive/modeld/transforms/transform.h"
class ModelFrame {
public:
ModelFrame(cl_device_id device_id, cl_context context);
~ModelFrame();
cl_mem* prepare(cl_mem yuv_cl, int width, int height, int frame_stride, int frame_uv_offset, const mat3& transform);
uint8_t* buffer_from_cl(cl_mem *in_frames);
const int MODEL_WIDTH = 512;
const int MODEL_HEIGHT = 256;
const int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT * 3 / 2;
const int buf_size = MODEL_FRAME_SIZE * 2;
const size_t frame_size_bytes = MODEL_FRAME_SIZE * sizeof(uint8_t);
private:
Transform transform;
LoadYUVState loadyuv;
cl_command_queue q;
cl_mem y_cl, u_cl, v_cl, img_buffer_20hz_cl, last_img_cl, input_frames_cl;
cl_buffer_region region;
std::unique_ptr<uint8_t[]> input_frames;
};