mirror of
https://github.com/sunnypilot/sunnypilot.git
synced 2026-02-21 20:03:55 +08:00
* bigmodel * more debug print * debugging bigmodel * remove the tanh, debugging * print images/buffers * disassemble the command queues * decompiler * dump the shaders * full disasm * support patching kernel and fixing convolution_horizontal_reduced_reads_1x1 * microbenchmark * 42 GFLOPS, 1 GB/s * gemm benchmark * 75 GFLOPS vs 42 GFLOPS * 115 GFLOPS * oops, never mind * gemm image is slow * this is pretty hopeless * gemm image gets 62 GFLOPS * this is addictive and still a waste of time * cleanup cleanup * that hook was dumb * tabbing * more tabbing Co-authored-by: Comma Device <device@comma.ai>
52 lines
992 B
C++
52 lines
992 B
C++
#pragma once
|
|
|
|
#include <stdint.h>
|
|
#include "include/msm_kgsl.h"
|
|
#include <vector>
|
|
#include <CL/cl.h>
|
|
|
|
class Thneed;
|
|
|
|
class GPUMalloc {
|
|
public:
|
|
GPUMalloc(int size, int fd);
|
|
void *alloc(int size);
|
|
private:
|
|
uint64_t base;
|
|
int remaining;
|
|
};
|
|
|
|
class CachedCommand {
|
|
public:
|
|
CachedCommand(Thneed *lthneed, struct kgsl_gpu_command *cmd);
|
|
void exec(bool wait);
|
|
void disassemble();
|
|
private:
|
|
struct kgsl_gpu_command cache;
|
|
struct kgsl_command_object cmds[2];
|
|
struct kgsl_command_object objs[1];
|
|
Thneed *thneed;
|
|
};
|
|
|
|
class Thneed {
|
|
public:
|
|
Thneed();
|
|
void stop();
|
|
void execute(float **finputs, float *foutput, bool slow=false);
|
|
|
|
std::vector<cl_mem> inputs;
|
|
cl_mem output;
|
|
|
|
cl_command_queue command_queue;
|
|
int context_id;
|
|
|
|
// protected?
|
|
int record;
|
|
int timestamp;
|
|
GPUMalloc *ram;
|
|
std::vector<CachedCommand *> cmds;
|
|
std::vector<std::pair<int, struct kgsl_gpuobj_sync_obj *> > syncobjs;
|
|
int fd;
|
|
};
|
|
|