compile fixes

This commit is contained in:
Comma Device 2024-11-11 15:58:27 -05:00
parent 408c0a5e7f
commit ad119af6a5
1 changed files with 17 additions and 8 deletions

View File

@ -5,8 +5,8 @@ if "IMAGE" not in os.environ: os.environ["IMAGE"] = "2"
if "NOLOCALS" not in os.environ: os.environ["NOLOCALS"] = "1" if "NOLOCALS" not in os.environ: os.environ["NOLOCALS"] = "1"
if "JIT_BATCH_SIZE" not in os.environ: os.environ["JIT_BATCH_SIZE"] = "0" if "JIT_BATCH_SIZE" not in os.environ: os.environ["JIT_BATCH_SIZE"] = "0"
from tinygrad import fetch, Tensor, TinyJit, Device, Context, GlobalCounters from tinygrad import fetch, Tensor, TinyJit, Context, GlobalCounters
from tinygrad.helpers import OSX, DEBUG, getenv from tinygrad.helpers import DEBUG, getenv
from tinygrad.tensor import _from_np_dtype from tinygrad.tensor import _from_np_dtype
import onnx import onnx
@ -14,14 +14,14 @@ from onnx.helper import tensor_dtype_to_np_dtype
from extra.onnx import get_run_onnx # TODO: port to main tinygrad from extra.onnx import get_run_onnx # TODO: port to main tinygrad
OPENPILOT_MODEL = sys.argv[1] if len(sys.argv) > 1 else "https://github.com/commaai/openpilot/raw/v0.9.7/selfdrive/modeld/models/supercombo.onnx" OPENPILOT_MODEL = sys.argv[1] if len(sys.argv) > 1 else "https://github.com/commaai/openpilot/raw/v0.9.7/selfdrive/modeld/models/supercombo.onnx"
OUTPUT = "/tmp/openpilot.pkl" OUTPUT = sys.argv[2] if len(sys.argv) > 2 else "/tmp/openpilot.pkl"
def compile(): def compile():
# hack to fix GPU on OSX: max doesn't work on half, see test/external/external_gpu_fail_osx.py # hack to fix GPU on OSX: max doesn't work on half, see test/external/external_gpu_fail_osx.py
if OSX: #if OSX:
from tinygrad.ops import BinaryOps # from tinygrad.ops import BinaryOps
from tinygrad.renderer.cstyle import ClangRenderer, CStyleLanguage # from tinygrad.renderer.cstyle import ClangRenderer, CStyleLanguage
CStyleLanguage.code_for_op[BinaryOps.MAX] = ClangRenderer.code_for_op[BinaryOps.MAX] # CStyleLanguage.code_for_op[BinaryOps.MAX] = ClangRenderer.code_for_op[BinaryOps.MAX]
Tensor.no_grad = True Tensor.no_grad = True
Tensor.training = False Tensor.training = False
@ -32,7 +32,12 @@ def compile():
print("loaded model") print("loaded model")
input_shapes = {inp.name:tuple(x.dim_value for x in inp.type.tensor_type.shape.dim) for inp in onnx_model.graph.input} input_shapes = {inp.name:tuple(x.dim_value for x in inp.type.tensor_type.shape.dim) for inp in onnx_model.graph.input}
input_types = {inp.name: tensor_dtype_to_np_dtype(inp.type.tensor_type.elem_type) for inp in onnx_model.graph.input} input_types = {inp.name: np.float32 for inp in onnx_model.graph.input}
if 'input_img' in input_shapes:
input_types['input_img'] = np.uint8
else:
input_types['input_imgs'] = np.uint8
input_types['big_input_imgs'] = np.uint8
Tensor.manual_seed(100) Tensor.manual_seed(100)
new_inputs = {k:Tensor.randn(*shp, dtype=_from_np_dtype(input_types[k])).mul(8).realize() for k,shp in sorted(input_shapes.items())} new_inputs = {k:Tensor.randn(*shp, dtype=_from_np_dtype(input_types[k])).mul(8).realize() for k,shp in sorted(input_shapes.items())}
print("created tensors") print("created tensors")
@ -65,7 +70,11 @@ def test(test_val=None):
new_inputs = {nm:Tensor.randn(*st.shape, dtype=dtype).mul(8).realize() for nm, (st, _, dtype, _) in new_inputs = {nm:Tensor.randn(*st.shape, dtype=dtype).mul(8).realize() for nm, (st, _, dtype, _) in
sorted(zip(run.captured.expected_names, run.captured.expected_st_vars_dtype_device))} sorted(zip(run.captured.expected_names, run.captured.expected_st_vars_dtype_device))}
for _ in range(20): for _ in range(20):
inputs_numpy = {k:v.numpy() for k,v in new_inputs.items()}
st = time.perf_counter() st = time.perf_counter()
for k in new_inputs:
if 'img' not in k: # dont need to init img tensors, those are backed by openCL GPU memory
new_inputs[k] = Tensor(inputs_numpy[k])
out = run(**new_inputs) out = run(**new_inputs)
mt = time.perf_counter() mt = time.perf_counter()
val = out['outputs'].numpy() val = out['outputs'].numpy()