mirror of https://github.com/1okko/openpilot.git
134 lines
2.9 KiB
C++
134 lines
2.9 KiB
C++
#pragma once
|
|
|
|
#ifndef __user
|
|
#define __user __attribute__(())
|
|
#endif
|
|
|
|
#include <cstdint>
|
|
#include <cstdlib>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include <CL/cl.h>
|
|
|
|
#include "third_party/linux/include/msm_kgsl.h"
|
|
|
|
using namespace std;
|
|
|
|
cl_int thneed_clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value);
|
|
|
|
namespace json11 {
|
|
class Json;
|
|
}
|
|
class Thneed;
|
|
|
|
class GPUMalloc {
|
|
public:
|
|
GPUMalloc(int size, int fd);
|
|
~GPUMalloc();
|
|
void *alloc(int size);
|
|
private:
|
|
uint64_t base;
|
|
int remaining;
|
|
};
|
|
|
|
class CLQueuedKernel {
|
|
public:
|
|
CLQueuedKernel(Thneed *lthneed) { thneed = lthneed; }
|
|
CLQueuedKernel(Thneed *lthneed,
|
|
cl_kernel _kernel,
|
|
cl_uint _work_dim,
|
|
const size_t *_global_work_size,
|
|
const size_t *_local_work_size);
|
|
cl_int exec();
|
|
void debug_print(bool verbose);
|
|
int get_arg_num(const char *search_arg_name);
|
|
cl_program program;
|
|
string name;
|
|
cl_uint num_args;
|
|
vector<string> arg_names;
|
|
vector<string> arg_types;
|
|
vector<string> args;
|
|
vector<int> args_size;
|
|
cl_kernel kernel = NULL;
|
|
json11::Json to_json() const;
|
|
|
|
cl_uint work_dim;
|
|
size_t global_work_size[3] = {0};
|
|
size_t local_work_size[3] = {0};
|
|
private:
|
|
Thneed *thneed;
|
|
};
|
|
|
|
class CachedIoctl {
|
|
public:
|
|
virtual void exec() {}
|
|
};
|
|
|
|
class CachedSync: public CachedIoctl {
|
|
public:
|
|
CachedSync(Thneed *lthneed, string ldata) { thneed = lthneed; data = ldata; }
|
|
void exec();
|
|
private:
|
|
Thneed *thneed;
|
|
string data;
|
|
};
|
|
|
|
class CachedCommand: public CachedIoctl {
|
|
public:
|
|
CachedCommand(Thneed *lthneed, struct kgsl_gpu_command *cmd);
|
|
void exec();
|
|
private:
|
|
void disassemble(int cmd_index);
|
|
struct kgsl_gpu_command cache;
|
|
unique_ptr<kgsl_command_object[]> cmds;
|
|
unique_ptr<kgsl_command_object[]> objs;
|
|
Thneed *thneed;
|
|
vector<shared_ptr<CLQueuedKernel> > kq;
|
|
};
|
|
|
|
class Thneed {
|
|
public:
|
|
Thneed(bool do_clinit=false, cl_context _context = NULL);
|
|
void stop();
|
|
void execute(float **finputs, float *foutput, bool slow=false);
|
|
void wait();
|
|
|
|
vector<cl_mem> input_clmem;
|
|
vector<void *> inputs;
|
|
vector<size_t> input_sizes;
|
|
cl_mem output = NULL;
|
|
|
|
cl_context context = NULL;
|
|
cl_command_queue command_queue;
|
|
cl_device_id device_id;
|
|
int context_id;
|
|
|
|
// protected?
|
|
bool record = false;
|
|
int debug;
|
|
int timestamp;
|
|
|
|
#ifdef QCOM2
|
|
unique_ptr<GPUMalloc> ram;
|
|
vector<unique_ptr<CachedIoctl> > cmds;
|
|
int fd;
|
|
#endif
|
|
|
|
// all CL kernels
|
|
void copy_inputs(float **finputs, bool internal=false);
|
|
void copy_output(float *foutput);
|
|
cl_int clexec();
|
|
vector<shared_ptr<CLQueuedKernel> > kq;
|
|
|
|
// pending CL kernels
|
|
vector<shared_ptr<CLQueuedKernel> > ckq;
|
|
|
|
// loading
|
|
void load(const char *filename);
|
|
private:
|
|
void clinit();
|
|
};
|
|
|