mirror of
https://github.com/sunnypilot/sunnypilot.git
synced 2026-03-01 17:24:09 +08:00
* squash * bump tg * bump tg * debump tinygrad * bump tinygrad * bump tg * Skip init iteration * fixes * cleanups * skip first test sample * typos * linter unhappy * update cpu usage * OPENCL just zeros for now * imports * Try printing * Runs again, but slower * unused import * Allow more buffer with tg and all on gpu * bump tinygrad * seems ok * stricter timings for driving looser for dm * try llvm * check nvidia * More timeout for now * make test pass * Revert "try llvm" This reverts commit ef136e478320101fea262bae3579e558da991902. * small fixes * whitespace * revert test timeout * No model runners * Always CPU always fast * No onnx runtime GPU * more cores * cleanup * Is this faster * Is this faster * at least runs * FP32 is faster than 16 * fix deps * whitespace * comment --------- Co-authored-by: Adeeb Shihadeh <adeebshihadeh@gmail.com>
40 lines
1.0 KiB
C++
40 lines
1.0 KiB
C++
#pragma once
|
|
|
|
#include <cfloat>
|
|
#include <cstdlib>
|
|
|
|
#include <memory>
|
|
|
|
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
|
|
#ifdef __APPLE__
|
|
#include <OpenCL/cl.h>
|
|
#else
|
|
#include <CL/cl.h>
|
|
#endif
|
|
|
|
#include "common/mat.h"
|
|
#include "selfdrive/modeld/transforms/loadyuv.h"
|
|
#include "selfdrive/modeld/transforms/transform.h"
|
|
|
|
class ModelFrame {
|
|
public:
|
|
ModelFrame(cl_device_id device_id, cl_context context);
|
|
~ModelFrame();
|
|
cl_mem* prepare(cl_mem yuv_cl, int width, int height, int frame_stride, int frame_uv_offset, const mat3& transform);
|
|
uint8_t* buffer_from_cl(cl_mem *in_frames);
|
|
|
|
const int MODEL_WIDTH = 512;
|
|
const int MODEL_HEIGHT = 256;
|
|
const int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT * 3 / 2;
|
|
const int buf_size = MODEL_FRAME_SIZE * 2;
|
|
const size_t frame_size_bytes = MODEL_FRAME_SIZE * sizeof(uint8_t);
|
|
|
|
private:
|
|
Transform transform;
|
|
LoadYUVState loadyuv;
|
|
cl_command_queue q;
|
|
cl_mem y_cl, u_cl, v_cl, img_buffer_20hz_cl, last_img_cl, input_frames_cl;
|
|
cl_buffer_region region;
|
|
std::unique_ptr<uint8_t[]> input_frames;
|
|
};
|