Files
sunnypilot/selfdrive/modeld/thneed/thneed.h
George Hotz 78a352a8ca This isn't bigmodel, but there's a lot of good stuff here (#1532)
* bigmodel

* more debug print

* debugging bigmodel

* remove the tanh, debugging

* print images/buffers

* disassemble the command queues

* decompiler

* dump the shaders

* full disasm

* support patching kernel and fixing convolution_horizontal_reduced_reads_1x1

* microbenchmark

* 42 GFLOPS, 1 GB/s

* gemm benchmark

* 75 GFLOPS vs 42 GFLOPS

* 115 GFLOPS

* oops, never mind

* gemm image is slow

* this is pretty hopeless

* gemm image gets 62 GFLOPS

* this is addictive and still a waste of time

* cleanup cleanup

* that hook was dumb

* tabbing

* more tabbing

Co-authored-by: Comma Device <device@comma.ai>
2020-05-17 23:13:17 -07:00

52 lines
992 B
C++

#pragma once
#include <stdint.h>
#include "include/msm_kgsl.h"
#include <vector>
#include <CL/cl.h>
class Thneed;
class GPUMalloc {
public:
GPUMalloc(int size, int fd);
void *alloc(int size);
private:
uint64_t base;
int remaining;
};
class CachedCommand {
public:
CachedCommand(Thneed *lthneed, struct kgsl_gpu_command *cmd);
void exec(bool wait);
void disassemble();
private:
struct kgsl_gpu_command cache;
struct kgsl_command_object cmds[2];
struct kgsl_command_object objs[1];
Thneed *thneed;
};
class Thneed {
public:
Thneed();
void stop();
void execute(float **finputs, float *foutput, bool slow=false);
std::vector<cl_mem> inputs;
cl_mem output;
cl_command_queue command_queue;
int context_id;
// protected?
int record;
int timestamp;
GPUMalloc *ram;
std::vector<CachedCommand *> cmds;
std::vector<std::pair<int, struct kgsl_gpuobj_sync_obj *> > syncobjs;
int fd;
};