From d894acbb5027662c1aa9a5786f6ab26374b9c5e5 Mon Sep 17 00:00:00 2001 From: qazal <77887910+Qazalin@users.noreply.github.com> Date: Wed, 12 Jun 2024 16:50:15 +0800 Subject: [PATCH] remove hardcoded -1s referencing late reduce (#4926) --- tinygrad/codegen/linearizer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tinygrad/codegen/linearizer.py b/tinygrad/codegen/linearizer.py index 547f4985..9e00f309 100644 --- a/tinygrad/codegen/linearizer.py +++ b/tinygrad/codegen/linearizer.py @@ -223,7 +223,7 @@ class Linearizer(Kernel): loop_ctx = self.render_loop(reduce_idxs, 2) # define accumulator - modify idxs if necessary for TC - out_buf = -(self.reduceops.index(reduceop)+1) if self.group_for_reduces else 0 + out_buf = -len(self.reduceops)+self.reduceops.index(reduceop) if self.group_for_reduces else 0 accs[reduceop] = self.global_load(out_buf, global_idxs+local_idxs+fake_reduce_idxs+upcast_idxs, acc=reduceop, loop_ctx=loop_ctx) # store local aliases @@ -299,10 +299,10 @@ class Linearizer(Kernel): accs[reduceop] = self.global_load(0, fake_global_idxs+local_idxs+fake_reduce_idxs+upcast_idxs, acc=reduceop, loop_ctx=loop_ctx) # load localbufs - loaded_buffers[self.bufs[-1]] = self.global_load(-1, fake_global_idxs+local_idxs+fake_reduce_idxs+upcast_idxs, barrier=barrier) + loaded_buffers[self.bufs[out_buf]] = self.global_load(-1, fake_global_idxs+local_idxs+fake_reduce_idxs+upcast_idxs, barrier=barrier) # there's no AST here (and there's no shape for the reduce LazyOp) - self.ast_parse(LazyOp(reduceop.op, (LazyOp(BufferOps.LOAD, (), self.bufs[-1]),)),\ + self.ast_parse(LazyOp(reduceop.op, (LazyOp(BufferOps.LOAD, (), self.bufs[out_buf]),)),\ accs, self.acc_offsets(-1), loaded_buffers, reduce_acc=accs[reduceop]) # end the late reduce loop