mirror of https://github.com/commaai/tinygrad.git
harmless to intertwine the math and the stores
This commit is contained in:
parent
708215d06b
commit
f1196984e6
|
@ -281,7 +281,7 @@ class Thneed:
|
|||
for i, ((prg, args), e) in enumerate(zip(self.cl_cache, events)):
|
||||
runtime = (e.profile.end - e.profile.start)
|
||||
print(f"{i:3d} time {total_runtime/1e6:5.2f} ms running {prg.name:20s} with {str(args[0]):15s} {str(args[1]):15s} count {len(args)-2:2d} runtime {runtime/1e3:7.2f} us {(prg.op_estimate)/runtime:9.2f} GFLOPS {prg.options} -> {args[2].shape if hasattr(args[2], 'shape') else args[2].size}")
|
||||
if DEBUGCL >= 2 and int(os.getenv("PRINT_KERNEL", "-1")) == i:
|
||||
if (DEBUGCL >= 2 and int(os.getenv("PRINT_KERNEL", "-1")) == i) or DEBUGCL >= 3:
|
||||
print(prg.prg)
|
||||
total_runtime += runtime
|
||||
print(f"total runtime: {total_runtime/1e6:.2f} ms wall time: {et*1000.0:.2f} ms")
|
||||
|
|
|
@ -354,13 +354,7 @@ class CLASTKernel(ASTKernel):
|
|||
self.kernel.append(f"for (int mid = 0; mid < {self.group_for_reduce}; mid++) {{ {accumulators[0].tok} += vload4(0, &temp[mid*4]); }}\n")
|
||||
|
||||
# late ast
|
||||
outs : List[Token] = []
|
||||
out : List[Token] = self.ast_parse(self.ast, reduce=accumulators)
|
||||
for i, out in enumerate(out):
|
||||
self.kernel.append(f"{out.decltype()} outs{i} = {out.tok};\n")
|
||||
outs.append(Token(f"outs{i}", out.typ))
|
||||
|
||||
self.store(0, outs)
|
||||
self.store(0, self.ast_parse(self.ast, reduce=accumulators))
|
||||
if self.group_for_reduce: self.kernel.append("}")
|
||||
self.kernel.append("}")
|
||||
|
||||
|
|
Loading…
Reference in New Issue