mirror of https://github.com/commaai/tinygrad.git
GEMM kernel search
This commit is contained in:
parent
123993156d
commit
6d5e1a8029
|
@ -132,6 +132,12 @@ if __name__ == "__main__":
|
|||
op0 = LazyOp(BinaryOps.MUL, (buf0,buf1,), None)
|
||||
op1 = LazyOp(ReduceOps.SUM, (op0,), (8, 1, 32, 112, 112, 1, 1, 1))
|
||||
ast = LazyOp(MovementOps.RESHAPE, (op1,), (8, 32, 112, 112))
|
||||
elif int(os.getenv("GEMM", "0")):
|
||||
buf0 = GPUBuffer(shape=ShapeTracker(shape=(1, 1, 512, 512, 1, 1, 1, 512), views=[View((1, 512, 512, 1), (0, 1, 512, 0), 0), View((1, 1, 512, 512, 1, 1, 1, 512), (0, 0, 0, 1, 0, 0, 0, 512), 0)]), hostbuf=GPUBuffer(shape=(512, 512), force_create=True))
|
||||
buf1 = GPUBuffer(shape=ShapeTracker(shape=(1, 1, 512, 512, 1, 1, 1, 512), views=[View((1, 1, 512, 512, 1, 1, 1, 512), (0, 0, 1, 0, 0, 0, 0, 512), 0)]), hostbuf=GPUBuffer(shape=(512, 512), force_create=True))
|
||||
op0 = LazyOp(BinaryOps.MUL, (buf0,buf1,), None)
|
||||
op1 = LazyOp(ReduceOps.SUM, (op0,), (1, 1, 512, 512, 1, 1, 1, 1))
|
||||
ast = LazyOp(MovementOps.RESHAPE, (op1,), (512, 512))
|
||||
else:
|
||||
# reduce
|
||||
buf0 = GPUBuffer(shape=ShapeTracker(shape=(3, 1, 32, 3, 3, 32, 112, 112), views=[View((3, 32, 225, 225), (50176, 150528, 224, 1), 0), ZeroView((3, 32, 224, 224), ((0, 3), (0, 32), (0, 225), (0, 225))), View((3, 1, 32, 3, 3, 32, 112, 112), (1620000, 1620000, 0, 225, 1, 50625, 450, 2), 0)]), hostbuf=GPUBuffer(shape=(32, 3, 224, 224), force_create=True))
|
||||
|
|
|
@ -98,6 +98,7 @@ class CLProgram:
|
|||
(str() if DEBUG <= 1 or CL.CACHE is not None else f"tm {et/1e3:9.2f}us/{CL.time_sum/1e6:9.2f}ms ({self.op_estimate/et:8.2f} GFLOPS)"))
|
||||
GlobalCounters.global_ops += self.op_estimate
|
||||
GlobalCounters.global_mem += sum([x.size//4 for x in args[2:] if isinstance(x, cl.Buffer)])
|
||||
return e if CL.CACHE is None else None
|
||||
|
||||
# **** end CL wrappers ****
|
||||
|
||||
|
|
Loading…
Reference in New Issue