mirror of
https://github.com/sunnypilot/sunnypilot.git
synced 2026-02-18 17:43:54 +08:00
Cleanup shift (#33677)
* Cleanup shift code * Unsafe buffer copy * fix bugs
This commit is contained in:
@@ -13,7 +13,7 @@ ModelFrame::ModelFrame(cl_device_id device_id, cl_context context) {
|
||||
y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_WIDTH * MODEL_HEIGHT, NULL, &err));
|
||||
u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err));
|
||||
v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err));
|
||||
net_input_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_FRAME_SIZE * sizeof(uint8_t), NULL, &err));
|
||||
net_input_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, frame_size_bytes, NULL, &err));
|
||||
|
||||
transform_init(&transform, context, device_id);
|
||||
loadyuv_init(&loadyuv, context, device_id, MODEL_WIDTH, MODEL_HEIGHT);
|
||||
@@ -24,15 +24,16 @@ uint8_t* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, i
|
||||
yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset,
|
||||
y_cl, u_cl, v_cl, MODEL_WIDTH, MODEL_HEIGHT, projection);
|
||||
|
||||
loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, net_input_cl);
|
||||
if (output == NULL) {
|
||||
loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, net_input_cl);
|
||||
|
||||
std::memmove(&input_frames[0], &input_frames[MODEL_FRAME_SIZE], sizeof(uint8_t) * MODEL_FRAME_SIZE);
|
||||
CL_CHECK(clEnqueueReadBuffer(q, net_input_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(uint8_t), &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr));
|
||||
std::memmove(&input_frames[0], &input_frames[MODEL_FRAME_SIZE], frame_size_bytes);
|
||||
CL_CHECK(clEnqueueReadBuffer(q, net_input_cl, CL_TRUE, 0, frame_size_bytes, &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr));
|
||||
clFinish(q);
|
||||
return &input_frames[0];
|
||||
} else {
|
||||
loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, *output, true);
|
||||
copy_queue(&loadyuv, q, *output, *output, frame_size_bytes, 0, frame_size_bytes);
|
||||
copy_queue(&loadyuv, q, net_input_cl, *output, 0, frame_size_bytes, frame_size_bytes);
|
||||
|
||||
// NOTE: Since thneed is using a different command queue, this clFinish is needed to ensure the image is ready.
|
||||
clFinish(q);
|
||||
return NULL;
|
||||
|
||||
@@ -26,6 +26,7 @@ public:
|
||||
const int MODEL_HEIGHT = 256;
|
||||
const int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT * 3 / 2;
|
||||
const int buf_size = MODEL_FRAME_SIZE * 2;
|
||||
const size_t frame_size_bytes = MODEL_FRAME_SIZE * sizeof(uint8_t);
|
||||
|
||||
private:
|
||||
Transform transform;
|
||||
|
||||
@@ -33,17 +33,8 @@ void loadyuv_destroy(LoadYUVState* s) {
|
||||
|
||||
void loadyuv_queue(LoadYUVState* s, cl_command_queue q,
|
||||
cl_mem y_cl, cl_mem u_cl, cl_mem v_cl,
|
||||
cl_mem out_cl, bool do_shift) {
|
||||
cl_mem out_cl) {
|
||||
cl_int global_out_off = 0;
|
||||
if (do_shift) {
|
||||
// shift the image in slot 1 to slot 0, then place the new image in slot 1
|
||||
global_out_off += (s->width*s->height) + (s->width/2)*(s->height/2)*2;
|
||||
CL_CHECK(clSetKernelArg(s->copy_krnl, 0, sizeof(cl_mem), &out_cl));
|
||||
CL_CHECK(clSetKernelArg(s->copy_krnl, 1, sizeof(cl_int), &global_out_off));
|
||||
const size_t copy_work_size = global_out_off/8;
|
||||
CL_CHECK(clEnqueueNDRangeKernel(q, s->copy_krnl, 1, NULL,
|
||||
©_work_size, NULL, 0, 0, NULL));
|
||||
}
|
||||
|
||||
CL_CHECK(clSetKernelArg(s->loadys_krnl, 0, sizeof(cl_mem), &y_cl));
|
||||
CL_CHECK(clSetKernelArg(s->loadys_krnl, 1, sizeof(cl_mem), &out_cl));
|
||||
@@ -72,3 +63,14 @@ void loadyuv_queue(LoadYUVState* s, cl_command_queue q,
|
||||
CL_CHECK(clEnqueueNDRangeKernel(q, s->loaduv_krnl, 1, NULL,
|
||||
&loaduv_work_size, NULL, 0, 0, NULL));
|
||||
}
|
||||
|
||||
void copy_queue(LoadYUVState* s, cl_command_queue q, cl_mem src, cl_mem dst,
|
||||
size_t src_offset, size_t dst_offset, size_t size) {
|
||||
CL_CHECK(clSetKernelArg(s->copy_krnl, 0, sizeof(cl_mem), &src));
|
||||
CL_CHECK(clSetKernelArg(s->copy_krnl, 1, sizeof(cl_mem), &dst));
|
||||
CL_CHECK(clSetKernelArg(s->copy_krnl, 2, sizeof(cl_int), &src_offset));
|
||||
CL_CHECK(clSetKernelArg(s->copy_krnl, 3, sizeof(cl_int), &dst_offset));
|
||||
const size_t copy_work_size = size/8;
|
||||
CL_CHECK(clEnqueueNDRangeKernel(q, s->copy_krnl, 1, NULL,
|
||||
©_work_size, NULL, 0, 0, NULL));
|
||||
}
|
||||
@@ -37,9 +37,11 @@ __kernel void loaduv(__global uchar8 const * const in,
|
||||
out[gid + out_offset / 8] = inv;
|
||||
}
|
||||
|
||||
__kernel void copy(__global uchar8 * inout,
|
||||
int in_offset)
|
||||
__kernel void copy(__global uchar8 * in,
|
||||
__global uchar8 * out,
|
||||
int in_offset,
|
||||
int out_offset)
|
||||
{
|
||||
const int gid = get_global_id(0);
|
||||
inout[gid] = inout[gid + in_offset / 8];
|
||||
out[gid + out_offset / 8] = in[gid + in_offset / 8];
|
||||
}
|
||||
|
||||
@@ -13,4 +13,8 @@ void loadyuv_destroy(LoadYUVState* s);
|
||||
|
||||
void loadyuv_queue(LoadYUVState* s, cl_command_queue q,
|
||||
cl_mem y_cl, cl_mem u_cl, cl_mem v_cl,
|
||||
cl_mem out_cl, bool do_shift = false);
|
||||
cl_mem out_cl);
|
||||
|
||||
|
||||
void copy_queue(LoadYUVState* s, cl_command_queue q, cl_mem src, cl_mem dst,
|
||||
size_t src_offset, size_t dst_offset, size_t size);
|
||||
Reference in New Issue
Block a user