mirror of https://github.com/1okko/openpilot.git
modeld: Move from SNPE to tinygrad (#25207)
* compiling, won't work yet * running with inputs and outputs * there's some magic chance this works * no more dlc, include onnx * yolo tests plz * bump tinygrad * files_common + delete dlc * tinygrad_repo -> tinygrad * pre commit config * llops needed * extra in files_common * bump tinygrad * fix indent * tinygrad/nn/__init__ * tinygrad_repo * bump tinygrad repo * bump tinygrad * bump with native_exp, match maybe * native_explog is argument * pyopencl no cache * 5% chance this matches * work in float32? * bump tinygrad * fix build * no __init__ * fix recip * dumb hack * adding thneed PC support * fix pc segfault * pc thneed is working * to_image * prints stuff with debug=2 * it sort of works * copy host ptr is simpler * bug fix * build on c3 * this correct? * reenable float16 * fix private, fixup copy_inputs internal * bump tinygrad and update ref commit * fix OPTWG on PC * maybe fix non determinism * revert model replay ref commit * comments, init zeroed out buffers * upd ref commit * bump tinygrad to fix initial image * try this ref Co-authored-by: Comma Device <device@comma.ai>
This commit is contained in:
parent
4bb399ba3c
commit
40d6f4b65c
|
@ -36,6 +36,7 @@ a.out
|
|||
config.json
|
||||
clcache
|
||||
compile_commands.json
|
||||
compare_runtime*.html
|
||||
|
||||
persist
|
||||
board/obj/
|
||||
|
|
|
@ -16,3 +16,6 @@
|
|||
[submodule "body"]
|
||||
path = body
|
||||
url = ../../commaai/body.git
|
||||
[submodule "tinygrad"]
|
||||
path = tinygrad_repo
|
||||
url = https://github.com/geohot/tinygrad.git
|
||||
|
|
|
@ -28,7 +28,7 @@ repos:
|
|||
rev: v0.931
|
||||
hooks:
|
||||
- id: mypy
|
||||
exclude: '^(pyextra/)|(cereal/)|(rednose/)|(panda/)|(laika/)|(opendbc/)|(laika_repo/)|(rednose_repo/)/'
|
||||
exclude: '^(pyextra/)|(cereal/)|(rednose/)|(panda/)|(laika/)|(opendbc/)|(laika_repo/)|(rednose_repo/)/|(tinygrad/)|(tinygrad_repo/)'
|
||||
additional_dependencies: ['types-PyYAML', 'lxml', 'numpy', 'types-atomicwrites', 'types-pycurl', 'types-requests', 'types-certifi']
|
||||
args:
|
||||
- --warn-redundant-casts
|
||||
|
@ -40,7 +40,7 @@ repos:
|
|||
rev: 4.0.1
|
||||
hooks:
|
||||
- id: flake8
|
||||
exclude: '^(pyextra/)|(cereal/)|(rednose/)|(panda/)|(laika/)|(opendbc/)|(laika_repo/)|(rednose_repo/)|(selfdrive/debug/)/'
|
||||
exclude: '^(pyextra/)|(cereal/)|(rednose/)|(panda/)|(laika/)|(opendbc/)|(laika_repo/)|(rednose_repo/)|(tinygrad/)|(tinygrad_repo/)|(selfdrive/debug/)/'
|
||||
additional_dependencies: ['flake8-no-implicit-concat']
|
||||
args:
|
||||
- --indent-size=2
|
||||
|
@ -55,7 +55,7 @@ repos:
|
|||
entry: pylint
|
||||
language: system
|
||||
types: [python]
|
||||
exclude: '^(pyextra/)|(cereal/)|(rednose/)|(panda/)|(laika/)|(laika_repo/)|(rednose_repo/)'
|
||||
exclude: '^(pyextra/)|(cereal/)|(rednose/)|(panda/)|(laika/)|(laika_repo/)|(rednose_repo/)|(tinygrad/)|(tinygrad_repo/)'
|
||||
args:
|
||||
- -rn
|
||||
- -sn
|
||||
|
|
|
@ -49,6 +49,11 @@ AddOption('--no-thneed',
|
|||
dest='no_thneed',
|
||||
help='avoid using thneed')
|
||||
|
||||
AddOption('--pc-thneed',
|
||||
action='store_true',
|
||||
dest='pc_thneed',
|
||||
help='use thneed on pc')
|
||||
|
||||
AddOption('--no-test',
|
||||
action='store_false',
|
||||
dest='test',
|
||||
|
|
|
@ -78,7 +78,8 @@ cl_program cl_program_from_file(cl_context ctx, cl_device_id device_id, const ch
|
|||
}
|
||||
|
||||
cl_program cl_program_from_source(cl_context ctx, cl_device_id device_id, const std::string& src, const char* args) {
|
||||
cl_program prg = CL_CHECK_ERR(clCreateProgramWithSource(ctx, 1, (const char*[]){src.c_str()}, NULL, &err));
|
||||
const char *csrc = src.c_str();
|
||||
cl_program prg = CL_CHECK_ERR(clCreateProgramWithSource(ctx, 1, &csrc, NULL, &err));
|
||||
if (int err = clBuildProgram(prg, 1, &device_id, args, NULL, NULL); err != 0) {
|
||||
cl_print_build_errors(prg, device_id);
|
||||
assert(0);
|
||||
|
@ -87,7 +88,7 @@ cl_program cl_program_from_source(cl_context ctx, cl_device_id device_id, const
|
|||
}
|
||||
|
||||
cl_program cl_program_from_binary(cl_context ctx, cl_device_id device_id, const uint8_t* binary, size_t length, const char* args) {
|
||||
cl_program prg = CL_CHECK_ERR(clCreateProgramWithBinary(ctx, 1, &device_id, &length, (const uint8_t*[]){binary}, NULL, &err));
|
||||
cl_program prg = CL_CHECK_ERR(clCreateProgramWithBinary(ctx, 1, &device_id, &length, &binary, NULL, &err));
|
||||
if (int err = clBuildProgram(prg, 1, &device_id, args, NULL, NULL); err != 0) {
|
||||
cl_print_build_errors(prg, device_id);
|
||||
assert(0);
|
||||
|
|
|
@ -78,7 +78,7 @@ find . -name 'moc_*' -delete
|
|||
find . -name '__pycache__' -delete
|
||||
rm -rf panda/board panda/certs panda/crypto
|
||||
rm -rf .sconsign.dblite Jenkinsfile release/
|
||||
rm selfdrive/modeld/models/supercombo.dlc
|
||||
rm selfdrive/modeld/models/supercombo.onnx
|
||||
|
||||
# Move back signed panda fw
|
||||
mkdir -p panda/board/obj
|
||||
|
|
|
@ -352,7 +352,7 @@ selfdrive/modeld/models/driving.cc
|
|||
selfdrive/modeld/models/driving.h
|
||||
selfdrive/modeld/models/dmonitoring.cc
|
||||
selfdrive/modeld/models/dmonitoring.h
|
||||
selfdrive/modeld/models/supercombo.dlc
|
||||
selfdrive/modeld/models/supercombo.onnx
|
||||
selfdrive/modeld/models/dmonitoring_model_q.dlc
|
||||
|
||||
selfdrive/modeld/transforms/loadyuv.cc
|
||||
|
@ -561,3 +561,16 @@ opendbc/vw_mqb_2010.dbc
|
|||
opendbc/tesla_can.dbc
|
||||
opendbc/tesla_radar.dbc
|
||||
opendbc/tesla_powertrain.dbc
|
||||
|
||||
tinygrad_repo/openpilot/compile.py
|
||||
tinygrad_repo/accel/opencl/*
|
||||
tinygrad_repo/extra/onnx.py
|
||||
tinygrad_repo/extra/utils.py
|
||||
tinygrad_repo/tinygrad/llops/ops_gpu.py
|
||||
tinygrad_repo/tinygrad/llops/ops_opencl.py
|
||||
tinygrad_repo/tinygrad/helpers.py
|
||||
tinygrad_repo/tinygrad/mlops.py
|
||||
tinygrad_repo/tinygrad/ops.py
|
||||
tinygrad_repo/tinygrad/shapetracker.py
|
||||
tinygrad_repo/tinygrad/tensor.py
|
||||
tinygrad_repo/tinygrad/nn/__init__.py
|
||||
|
|
|
@ -62,25 +62,65 @@ else:
|
|||
|
||||
common_model = lenv.Object(common_src)
|
||||
|
||||
# build thneed model
|
||||
if use_thneed and arch == "larch64":
|
||||
fn = File("models/supercombo").abspath
|
||||
compiler = lenv.Program('thneed/compile', ["thneed/compile.cc"]+common_model, LIBS=libs)
|
||||
cmd = f"cd {Dir('.').abspath} && {compiler[0].abspath} --in {fn}.dlc --out {fn}.thneed --binary --optimize"
|
||||
|
||||
lib_paths = ':'.join(Dir(p).abspath for p in lenv["LIBPATH"])
|
||||
kernel_path = os.path.join(Dir('.').abspath, "thneed", "kernels")
|
||||
cenv = Environment(ENV={'LD_LIBRARY_PATH': f"{lib_paths}:{lenv['ENV']['LD_LIBRARY_PATH']}", 'KERNEL_PATH': kernel_path})
|
||||
|
||||
kernels = [os.path.join(kernel_path, x) for x in os.listdir(kernel_path) if x.endswith(".cl")]
|
||||
cenv.Command(fn + ".thneed", [fn + ".dlc", kernels, compiler], cmd)
|
||||
|
||||
lenv.Program('_dmonitoringmodeld', [
|
||||
"dmonitoringmodeld.cc",
|
||||
"models/dmonitoring.cc",
|
||||
]+common_model, LIBS=libs)
|
||||
|
||||
lenv.Program('_modeld', [
|
||||
# build thneed model
|
||||
if use_thneed and arch == "larch64" or GetOption('pc_thneed'):
|
||||
fn = File("models/supercombo").abspath
|
||||
|
||||
if GetOption('pc_thneed'):
|
||||
cmd = f"cd {Dir('#').abspath}/tinygrad_repo && NATIVE_EXPLOG=1 OPTWG=1 UNSAFE_FLOAT4=1 DEBUGCL=1 python3 openpilot/compile.py {fn}.onnx {fn}.thneed"
|
||||
else:
|
||||
cmd = f"cd {Dir('#').abspath}/tinygrad_repo && FLOAT16=1 PYOPENCL_NO_CACHE=1 MATMUL=1 NATIVE_EXPLOG=1 OPTWG=1 UNSAFE_FLOAT4=1 DEBUGCL=1 python3 openpilot/compile.py {fn}.onnx {fn}.thneed"
|
||||
|
||||
# is there a better way then listing all of tinygrad?
|
||||
lenv.Command(fn + ".thneed", [fn + ".onnx",
|
||||
"#tinygrad_repo/openpilot/compile.py",
|
||||
"#tinygrad_repo/accel/opencl/conv.cl",
|
||||
"#tinygrad_repo/accel/opencl/matmul.cl",
|
||||
"#tinygrad_repo/accel/opencl/ops_opencl.py",
|
||||
"#tinygrad_repo/accel/opencl/preprocessing.py",
|
||||
"#tinygrad_repo/extra/onnx.py",
|
||||
"#tinygrad_repo/extra/utils.py",
|
||||
"#tinygrad_repo/tinygrad/llops/ops_gpu.py",
|
||||
"#tinygrad_repo/tinygrad/llops/ops_opencl.py",
|
||||
"#tinygrad_repo/tinygrad/helpers.py",
|
||||
"#tinygrad_repo/tinygrad/mlops.py",
|
||||
"#tinygrad_repo/tinygrad/ops.py",
|
||||
"#tinygrad_repo/tinygrad/shapetracker.py",
|
||||
"#tinygrad_repo/tinygrad/tensor.py",
|
||||
"#tinygrad_repo/tinygrad/nn/__init__.py"
|
||||
], cmd)
|
||||
|
||||
# old thneed compiler. TODO: remove this once tinygrad stuff is stable
|
||||
|
||||
#compiler = lenv.Program('thneed/compile', ["thneed/compile.cc"]+common_model, LIBS=libs)
|
||||
#cmd = f"cd {Dir('.').abspath} && {compiler[0].abspath} --in {fn}.dlc --out {fn}.thneed --binary --optimize"
|
||||
|
||||
#lib_paths = ':'.join(Dir(p).abspath for p in lenv["LIBPATH"])
|
||||
#kernel_path = os.path.join(Dir('.').abspath, "thneed", "kernels")
|
||||
#cenv = Environment(ENV={'LD_LIBRARY_PATH': f"{lib_paths}:{lenv['ENV']['LD_LIBRARY_PATH']}", 'KERNEL_PATH': kernel_path})
|
||||
|
||||
#kernels = [os.path.join(kernel_path, x) for x in os.listdir(kernel_path) if x.endswith(".cl")]
|
||||
#cenv.Command(fn + ".thneed", [fn + ".dlc", kernels, compiler], cmd)
|
||||
|
||||
llenv = lenv.Clone()
|
||||
if GetOption('pc_thneed'):
|
||||
pc_thneed_src = [
|
||||
"thneed/thneed_common.cc",
|
||||
"thneed/thneed_pc.cc",
|
||||
"thneed/serialize.cc",
|
||||
"runners/thneedmodel.cc",
|
||||
]
|
||||
llenv['CFLAGS'].append("-DUSE_THNEED")
|
||||
llenv['CXXFLAGS'].append("-DUSE_THNEED")
|
||||
common_model += llenv.Object(pc_thneed_src)
|
||||
libs += ['dl']
|
||||
|
||||
llenv.Program('_modeld', [
|
||||
"modeld.cc",
|
||||
"models/driving.cc",
|
||||
]+common_model, LIBS=libs + transformations)
|
||||
|
|
|
@ -1,3 +0,0 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:93d265fc88f05746ce47257e15fc2afe43b250b44715313049f829e8aa30a9d6
|
||||
size 94302331
|
|
@ -33,11 +33,14 @@ void Thneed::load(const char *filename) {
|
|||
assert(mobj["needs_load"].bool_value() == false);
|
||||
} else {
|
||||
if (mobj["needs_load"].bool_value()) {
|
||||
//printf("loading %p %d @ 0x%X\n", clbuf, sz, ptr);
|
||||
clbuf = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, sz, &buf[ptr], NULL);
|
||||
if (debug >= 1) printf("loading %p %d @ 0x%X\n", clbuf, sz, ptr);
|
||||
ptr += sz;
|
||||
} else {
|
||||
clbuf = clCreateBuffer(context, CL_MEM_READ_WRITE, sz, NULL, NULL);
|
||||
// TODO: is there a faster way to init zeroed out buffers?
|
||||
void *host_zeros = calloc(sz, 1);
|
||||
clbuf = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, sz, host_zeros, NULL);
|
||||
free(host_zeros);
|
||||
}
|
||||
}
|
||||
assert(clbuf != NULL);
|
||||
|
|
|
@ -122,7 +122,7 @@ class Thneed {
|
|||
|
||||
// all CL kernels
|
||||
void find_inputs_outputs();
|
||||
void copy_inputs(float **finputs);
|
||||
void copy_inputs(float **finputs, bool internal=false);
|
||||
void copy_output(float *foutput);
|
||||
cl_int clexec();
|
||||
vector<shared_ptr<CLQueuedKernel> > kq;
|
||||
|
|
|
@ -30,17 +30,16 @@ cl_int Thneed::clexec() {
|
|||
return clFinish(command_queue);
|
||||
}
|
||||
|
||||
void Thneed::copy_inputs(float **finputs) {
|
||||
//cl_int ret;
|
||||
void Thneed::copy_inputs(float **finputs, bool internal) {
|
||||
for (int idx = 0; idx < inputs.size(); ++idx) {
|
||||
if (debug >= 1) printf("copying %lu -- %p -> %p (cl %p)\n", input_sizes[idx], finputs[idx], inputs[idx], input_clmem[idx]);
|
||||
|
||||
// TODO: fix thneed caching
|
||||
if (finputs[idx] != NULL) memcpy(inputs[idx], finputs[idx], input_sizes[idx]);
|
||||
//if (finputs[idx] != NULL) CL_CHECK(clEnqueueWriteBuffer(command_queue, input_clmem[idx], CL_TRUE, 0, input_sizes[idx], finputs[idx], 0, NULL, NULL));
|
||||
|
||||
// HACK
|
||||
//if (input_sizes[idx] == 16) memset((char*)inputs[idx] + 8, 0, 8);
|
||||
if (internal) {
|
||||
// if it's internal, using memcpy is fine since the buffer sync is cached in the ioctl layer
|
||||
if (finputs[idx] != NULL) memcpy(inputs[idx], finputs[idx], input_sizes[idx]);
|
||||
} else {
|
||||
if (finputs[idx] != NULL) CL_CHECK(clEnqueueWriteBuffer(command_queue, input_clmem[idx], CL_TRUE, 0, input_sizes[idx], finputs[idx], 0, NULL, NULL));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -202,8 +201,8 @@ void CLQueuedKernel::debug_print(bool verbose) {
|
|||
assert(slice_pitch == 0);
|
||||
|
||||
clGetImageInfo(val, CL_IMAGE_BUFFER, sizeof(buf), &buf, NULL);
|
||||
size_t sz;
|
||||
clGetMemObjectInfo(buf, CL_MEM_SIZE, sizeof(sz), &sz, NULL);
|
||||
size_t sz = 0;
|
||||
if (buf != NULL) clGetMemObjectInfo(buf, CL_MEM_SIZE, sizeof(sz), &sz, NULL);
|
||||
printf(" image %zu x %zu rp %zu @ %p buffer %zu", width, height, row_pitch, buf, sz);
|
||||
} else {
|
||||
size_t sz;
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
#include "selfdrive/modeld/thneed/thneed.h"
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#include "common/clutil.h"
|
||||
#include "common/timing.h"
|
||||
|
||||
Thneed::Thneed(bool do_clinit, cl_context _context) {
|
||||
context = _context;
|
||||
if (do_clinit) clinit();
|
||||
char *thneed_debug_env = getenv("THNEED_DEBUG");
|
||||
debug = (thneed_debug_env != NULL) ? atoi(thneed_debug_env) : 0;
|
||||
}
|
||||
|
||||
void Thneed::execute(float **finputs, float *foutput, bool slow) {
|
||||
uint64_t tb, te;
|
||||
if (debug >= 1) tb = nanos_since_boot();
|
||||
|
||||
// ****** copy inputs
|
||||
copy_inputs(finputs);
|
||||
|
||||
// ****** run commands
|
||||
clexec();
|
||||
|
||||
// ****** copy outputs
|
||||
copy_output(foutput);
|
||||
|
||||
if (debug >= 1) {
|
||||
te = nanos_since_boot();
|
||||
printf("model exec in %lu us\n", (te-tb)/1000);
|
||||
}
|
||||
}
|
||||
|
||||
void Thneed::stop() {
|
||||
}
|
||||
|
||||
void Thneed::find_inputs_outputs() {
|
||||
// thneed on PC doesn't work on old style inputs/outputs
|
||||
}
|
||||
|
|
@ -269,7 +269,7 @@ void Thneed::execute(float **finputs, float *foutput, bool slow) {
|
|||
if (debug >= 1) tb = nanos_since_boot();
|
||||
|
||||
// ****** copy inputs
|
||||
copy_inputs(finputs);
|
||||
copy_inputs(finputs, true);
|
||||
|
||||
// ****** set power constraint
|
||||
int ret;
|
||||
|
|
|
@ -1 +1 @@
|
|||
ca90e11f8d59902af38d3785ddd91a27d0fbb411
|
||||
cffb4e720b0379bedd4ff802912d998ace775c37
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Subproject commit 2e9b7637b3c3c8895fda9f964215db3a35fe3441
|
Loading…
Reference in New Issue