From b1ba78ac3880d44ae2c1403d9fc2852adf0c0ba1 Mon Sep 17 00:00:00 2001 From: George Hotz Date: Sun, 5 Mar 2023 11:21:12 -0800 Subject: [PATCH] move applegpu disassembler --- .gitignore | 2 +- test/external/external_test_gpu_ast.py | 6 +++--- test/test_specific_conv.py | 13 +++++++++++++ tinygrad/runtime/ops_metal.py | 4 ++-- 4 files changed, 19 insertions(+), 6 deletions(-) create mode 100644 test/test_specific_conv.py diff --git a/.gitignore b/.gitignore index 87e1d496..386263bc 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,4 @@ pandecode.dump vertex.bin recognize* .idea -applegpu +disassemblers/applegpu diff --git a/test/external/external_test_gpu_ast.py b/test/external/external_test_gpu_ast.py index 81d32818..e6f862ba 100644 --- a/test/external/external_test_gpu_ast.py +++ b/test/external/external_test_gpu_ast.py @@ -154,9 +154,9 @@ class TestAST(unittest.TestCase): buf1 = GPUBuffer(shape=ShapeTracker(shape=(1, 32, 64, 1, 1, 6, 4, 1, 1, 24, 4), views=[View((1, 32, 64, 1, 1, 6, 4, 1, 1, 24, 4), (0, 0, 0, 0, 0, 384, 4, 0, 0, 16, 1), 0)]), hostbuf=GPUBuffer(shape=(6, 96, 4), force_create=True)) op0 = LazyOp(BinaryOps.MUL, (buf0,buf1,), None) op1 = LazyOp(ReduceOps.SUM, (op0,), (1, 32, 64, 1, 1, 6, 4, 1, 1, 1, 1)) - buf2 = GPUBuffer(shape=ShapeTracker(shape=(1, 32, 64, 1, 1, 6, 4, 1, 1, 1, 1), views=[View((1, 32, 64, 1, 1, 6, 4, 1, 1, 1, 1), (0, 0, 0, 0, 0, 4, 1, 0, 0, 0, 0), 0)]), hostbuf=GPUBuffer(shape=(24,), force_create=True)) - op2 = LazyOp(BinaryOps.ADD, (op1,buf2,), None) - ast = LazyOp(MovementOps.RESHAPE, (op2,), (32, 384, 4)) + #buf2 = GPUBuffer(shape=ShapeTracker(shape=(1, 32, 64, 1, 1, 6, 4, 1, 1, 1, 1), views=[View((1, 32, 64, 1, 1, 6, 4, 1, 1, 1, 1), (0, 0, 0, 0, 0, 4, 1, 0, 0, 0, 0), 0)]), hostbuf=GPUBuffer(shape=(24,), force_create=True)) + #op2 = LazyOp(BinaryOps.ADD, (op1,buf2,), None) + ast = LazyOp(MovementOps.RESHAPE, (op1,), (32, 384, 4)) compile_and_test_ast(ast, (6, 4, 8)) def test_full_reduce_op(self): diff --git a/test/test_specific_conv.py b/test/test_specific_conv.py new file mode 100644 index 00000000..4bb345e7 --- /dev/null +++ b/test/test_specific_conv.py @@ -0,0 +1,13 @@ +import unittest +from tinygrad.tensor import Tensor +# similar to test/external/external_test_gpu_ast.py, but universal + +# 1x1 6 <- 24 +class TestSpecificConv(unittest.TestCase): + def test_1x1_6_24(self): + x = Tensor.randn(1,24*4,32,64) + w = Tensor.randn(6*4,24*4,1,1) + x.conv2d(w).permute(0,2,3,1).reshape(32, 384, 4).contiguous().realize() + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/tinygrad/runtime/ops_metal.py b/tinygrad/runtime/ops_metal.py index 39d0b28d..e37b46a7 100644 --- a/tinygrad/runtime/ops_metal.py +++ b/tinygrad/runtime/ops_metal.py @@ -53,8 +53,8 @@ class MetalProgram: desc.setComputeFunction_(self.fxn) unwrap(arc.addComputePipelineFunctionsWithDescriptor_error_(desc, None)) unwrap(arc.serializeToURL_error_(Cocoa.NSURL.URLWithString_("file:///tmp/shader.bin"), None)) - # clone https://github.com/dougallj/applegpu.git in the root of tinygrad - os.system(f"cd {pathlib.Path(__file__).parent.parent.parent}/applegpu && python3 compiler_explorer.py /tmp/shader.bin") + # clone https://github.com/dougallj/applegpu.git in tinygrad/disassemblers + os.system(f"cd {pathlib.Path(__file__).parent.parent.parent}/disassemblers/applegpu && python3 compiler_explorer.py /tmp/shader.bin") self.pipeline_state = unwrap(METAL.device.newComputePipelineStateWithFunction_error_(self.fxn, None)) def __call__(self, global_size, local_size, *bufs, wait=False):