From 9145ad52ffc5bc512d5c7f6d4c43fff1739b0d84 Mon Sep 17 00:00:00 2001 From: qazal <77887910+Qazalin@users.noreply.github.com> Date: Tue, 13 Aug 2024 23:02:34 +0800 Subject: [PATCH] revert UOps eq, this needs to be isolated in realize.py (#6063) This reverts commit dccca7f2273d2029ecf030f3cd854a8c453a5891. --- test/helpers.py | 21 +++++++++- test/test_pattern_matcher.py | 3 +- test/test_uop_graph.py | 79 ++++++++++++++++++------------------ test/test_uops.py | 40 ++---------------- tinygrad/codegen/uops.py | 9 ---- 5 files changed, 66 insertions(+), 86 deletions(-) diff --git a/test/helpers.py b/test/helpers.py index c77affee..f4e49316 100644 --- a/test/helpers.py +++ b/test/helpers.py @@ -1,6 +1,8 @@ -import sys +import sys, unittest +from typing import Optional, Set, Tuple import numpy as np from tinygrad import Tensor, Device, dtypes +from tinygrad.codegen.uops import UOp from tinygrad.tensor import _to_np_dtype from tinygrad.engine.realize import Runner from tinygrad.dtype import DType @@ -51,3 +53,20 @@ def rand_for_dtype(dt:DType, size:int): elif dt == dtypes.bool: return np.random.choice([True, False], size=size) return np.random.uniform(-10, 10, size=size).astype(_to_np_dtype(dt)) + +class TestUOps(unittest.TestCase): + def assert_equiv_uops(self, uop1:UOp, uop2:UOp, cache:Optional[Set[Tuple[UOp, UOp]]]=None): + if cache is None: cache = set() + if (uop1, uop2) in cache: return + cache.add((uop1, uop2)) + # NOTE: direct UOps __eq__ is comparing object reference, use this function to compare two uops + try: + self.assertIs(uop1.op, uop2.op) + self.assertEqual(uop1.dtype, uop2.dtype) + self.assertEqual(uop1.arg, uop2.arg) + self.assertEqual(len(uop1.src), len(uop2.src)) + for s1, s2 in zip(uop1.src, uop2.src): self.assert_equiv_uops(s1, s2) + except AssertionError as e: + print(f"{uop1=}") + print(f"{uop2=}") + raise e diff --git a/test/test_pattern_matcher.py b/test/test_pattern_matcher.py index 78552081..216cf38c 100644 --- a/test/test_pattern_matcher.py +++ b/test/test_pattern_matcher.py @@ -1,10 +1,11 @@ import unittest, itertools +from test.helpers import TestUOps from tinygrad.dtype import dtypes from tinygrad.ops import BinaryOps, TernaryOps, ReduceOps, UnaryOps # noqa: F401 from tinygrad.codegen.uops import UOps, UOp, PatternMatcher, UPat from tinygrad.codegen.uopgraph import constant_folder -class TestPatternMatcher(unittest.TestCase): +class TestPatternMatcher(TestUOps): def test_simple_match(self): matcher = PatternMatcher([(UPat(UOps.CONST, name="x", dtype=dtypes.float), lambda x: x)]) c1 = UOp(UOps.CONST, dtypes.float, arg=1.0) diff --git a/test/test_uop_graph.py b/test/test_uop_graph.py index d41b9dc3..bbd3553f 100644 --- a/test/test_uop_graph.py +++ b/test/test_uop_graph.py @@ -1,4 +1,5 @@ import unittest +from test.helpers import TestUOps from tinygrad import dtypes, Variable from tinygrad.dtype import PtrDType from tinygrad.helpers import DEBUG @@ -89,7 +90,7 @@ class TestGraphRewrite(unittest.TestCase): self.assertEqual(sink.src[1].op, UOps.CONST) self.assertEqual(len([x for x in sink.sparents if x.op is UOps.CONST]), 3) -class TestUOpGraph(unittest.TestCase): +class TestUOpGraph(TestUOps): def test_add_constant_fold(self): c1 = UOp(UOps.CONST, dtypes.float, arg=1.0) c2 = UOp(UOps.CONST, dtypes.float, arg=2.0) @@ -160,7 +161,7 @@ class TestUOpGraph(unittest.TestCase): # possible val = UOp(UOps.LOAD, dtypes.float.vec(4), (d1, idx)) xyzw = tuple(UOp(UOps.GEP, dtypes.float, (val,), i) for i in range(4)) - self.assertEqual(_test_vec(xyzw), val) + self.assert_equiv_uops(_test_vec(xyzw), val) # unaligned val = UOp(UOps.LOAD, dtypes.float.vec(4), (d1, idx)) @@ -189,7 +190,7 @@ class TestUOpGraph(unittest.TestCase): geps = [UOp(UOps.GEP, dtypes.float, (vec,), i) for i in range(vec_size)] g = UOpGraph(geps) for uop, const in zip(g.uops, consts): - self.assertEqual(uop, const) + self.assert_equiv_uops(uop, const) def test_wmma_vectorize_fold(self): for i in [2, 4, 8]: @@ -198,7 +199,7 @@ class TestUOpGraph(unittest.TestCase): acc = UOp(UOps.DEFINE_VAR, dtypes.half.vec(i), arg=Variable('acc', 0.0, 1.0)) wmma = UOp(UOps.WMMA, dtypes.half.vec(i), (vec, var, acc)) g = UOpGraph([wmma]) - self.assertEqual(g.uops[0], acc) + self.assert_equiv_uops(g.uops[0], acc) self.assertEqual(len(g.uops), 1) for i in [2, 4, 8]: @@ -207,7 +208,7 @@ class TestUOpGraph(unittest.TestCase): acc = UOp(UOps.DEFINE_VAR, dtypes.half.vec(i), arg=Variable('acc', 0.0, 1.0)) wmma = UOp(UOps.WMMA, dtypes.half.vec(i), (var, vec, acc)) g = UOpGraph([wmma]) - self.assertEqual(g.uops[0], acc) + self.assert_equiv_uops(g.uops[0], acc) self.assertEqual(len(g.uops), 1) def test_wmma_vectorize_no_fold(self): @@ -219,7 +220,7 @@ class TestUOpGraph(unittest.TestCase): acc = UOp(UOps.DEFINE_VAR, dtypes.half.vec(i), arg=Variable('acc', 0.0, 1.0)) wmma = UOp(UOps.WMMA, dtypes.half.vec(i), (vec, var, acc)) g = UOpGraph([wmma]) - self.assertEqual(g.uops[-1], wmma) + self.assert_equiv_uops(g.uops[-1], wmma) for i in [4, 8]: var = UOp(UOps.DEFINE_VAR, dtypes.half.vec(i), arg=Variable(f'tmp{i}', 0.0, 1.0)) @@ -229,7 +230,7 @@ class TestUOpGraph(unittest.TestCase): acc = UOp(UOps.DEFINE_VAR, dtypes.half.vec(i), arg=Variable('acc', 0.0, 1.0)) wmma = UOp(UOps.WMMA, dtypes.half.vec(i), (var, vec, acc)) g = UOpGraph([wmma]) - self.assertEqual(g.uops[-1], wmma) + self.assert_equiv_uops(g.uops[-1], wmma) for i in [2, 4, 8]: vec = UOp(UOps.VECTORIZE, dtypes.half.vec(i), @@ -238,7 +239,7 @@ class TestUOpGraph(unittest.TestCase): acc = UOp(UOps.DEFINE_VAR, dtypes.half.vec(i), arg=Variable('acc', 0.0, 1.0)) wmma = UOp(UOps.WMMA, dtypes.half.vec(i), (vec, var, acc)) g = UOpGraph([wmma]) - self.assertEqual(g.uops[-1], wmma) + self.assert_equiv_uops(g.uops[-1], wmma) for i in [2, 4, 8]: var = UOp(UOps.DEFINE_VAR, dtypes.half.vec(i), arg=Variable(f'tmp{i}', 0.0, 1.0)) @@ -247,7 +248,7 @@ class TestUOpGraph(unittest.TestCase): acc = UOp(UOps.DEFINE_VAR, dtypes.half.vec(i), arg=Variable('acc', 0.0, 1.0)) wmma = UOp(UOps.WMMA, dtypes.half.vec(i), (var, vec, acc)) g = UOpGraph([wmma]) - self.assertEqual(g.uops[-1], wmma) + self.assert_equiv_uops(g.uops[-1], wmma) def test_cast_alu_fold(self): d0 = UOp(UOps.DEFINE_GLOBAL, PtrDType(dtypes.bool), arg=0) @@ -293,9 +294,9 @@ class TestUOpGraph(unittest.TestCase): uops = UOpGraph([UOp(UOps.STORE, None, (glbl0, idx, ld1+ld0))]) ld0, ld1 = uops[-1].src[2].src # ld0 becomes the invalid value - self.assertEqual(ld1, UOp.const(dtypes.int, 2)) + self.assert_equiv_uops(ld1, UOp.const(dtypes.int, 2)) # the gate and invalid value are deleted from ld1 - self.assertEqual(ld0, UOp.load(glbl2, idx, dtype=dtypes.int)) + self.assert_equiv_uops(ld0, UOp.load(glbl2, idx, dtype=dtypes.int)) def test_fold_gated_load_local(self): glbl0 = UOp(UOps.DEFINE_GLOBAL, PtrDType(dtypes.int), (), 0) @@ -308,9 +309,9 @@ class TestUOpGraph(unittest.TestCase): uops = UOpGraph([UOp(UOps.STORE, None, (glbl0, lidx, ld1+ld0))]) ld0, ld1 = uops[-1].src[2].src # ld0 becomes the invalid value - self.assertEqual(ld1, UOp.const(dtypes.int, 2)) + self.assert_equiv_uops(ld1, UOp.const(dtypes.int, 2)) # the gate and invalid value are deleted from ld1 - self.assertEqual(ld0, UOp.load(smem, lidx+2, barrier, dtype=dtypes.int)) + self.assert_equiv_uops(ld0, UOp.load(smem, lidx+2, barrier, dtype=dtypes.int)) def test_fold_gated_store(self): glbl = UOp(UOps.DEFINE_GLOBAL, PtrDType(dtypes.int), (), 0) @@ -322,7 +323,7 @@ class TestUOpGraph(unittest.TestCase): uops = UOpGraph([st0, st1]) # only the second store happens self.assertEqual(len(uops.uops), 4) - self.assertEqual(uops[-1], UOp.store(glbl, idx1, val)) + self.assert_equiv_uops(uops[-1], UOp.store(glbl, idx1, val)) def test_asserts_bad_gate(self): glbl0 = UOp(UOps.DEFINE_GLOBAL, PtrDType(dtypes.int), (), 0) @@ -559,7 +560,7 @@ class TestLoadStoreFolder(unittest.TestCase): def gate_rewrite(sink): return graph_rewrite(sink, constant_folder + expander + reducer) -class TestIFUOps(unittest.TestCase): +class TestIFUOps(TestUOps): def test_create_ifs(self): gbuf = UOp(UOps.DEFINE_GLOBAL, PtrDType(dtypes.float), (), 0) sbuf = UOp(UOps.DEFINE_LOCAL, PtrDType(dtypes.float), (), ("smem", 4)) @@ -575,7 +576,7 @@ class TestIFUOps(unittest.TestCase): sink = gate_rewrite(sink) if_uops = [u for u in sink.parents if u.op is UOps.IF] self.assertEqual(len(if_uops), 1) - self.assertEqual(if_uops[0].src[0], gate) + self.assert_equiv_uops(if_uops[0].src[0], gate) for st in sink.src: self.assertEqual(len(st.src), 3) @@ -593,7 +594,7 @@ class TestIFUOps(unittest.TestCase): sink = gate_rewrite(sink) if_uops = [u for u in sink.parents if u.op is UOps.IF] self.assertEqual(len(if_uops), 1) - self.assertEqual(if_uops[0].src[0], gate) + self.assert_equiv_uops(if_uops[0].src[0], gate) for st in sink.src: self.assertEqual(len(st.src), 3) @@ -609,19 +610,19 @@ class TestIFUOps(unittest.TestCase): sink = gate_rewrite(sink) if_uops = [u for u in sink.parents if u.op is UOps.IF] self.assertEqual(len(if_uops), 1) - self.assertEqual(if_uops[0].src[0], gate) + self.assert_equiv_uops(if_uops[0].src[0], gate) for st in sink.src: self.assertEqual(len(st.src), 3) -class TestDivMod(unittest.TestCase): +class TestDivMod(TestUOps): def c(self, c:int): return UOp.const(dtypes.int, c) def x(self, expr:str, nmin:int, nmax:int): return UOp(UOps.DEFINE_VAR, dtypes.int, (self.c(nmin), self.c(nmax)), Variable(expr, nmin, nmax)) # NOTE: does not simplify to the end def test_const_mod(self): - self.assertEqual(mod_folding(self.c(6), 3), self.c(1)*self.c(0)) - self.assertEqual(mod_folding(self.c(7), 3), self.c(1)*self.c(1)) - self.assertEqual(mod_folding(self.c(8), 3), self.c(1)*self.c(2)) + self.assert_equiv_uops(mod_folding(self.c(6), 3), self.c(1)*self.c(0)) + self.assert_equiv_uops(mod_folding(self.c(7), 3), self.c(1)*self.c(1)) + self.assert_equiv_uops(mod_folding(self.c(8), 3), self.c(1)*self.c(2)) def test_var_mod(self): self.assertIsNone(mod_folding(self.x("x", 0, 6), 3)) @@ -629,32 +630,32 @@ class TestDivMod(unittest.TestCase): @unittest.skip("does not simplify to the end") def test_add_mod(self): - self.assertEqual(mod_folding(self.x("x", 0, 6)+40, 5), self.x("x", 0, 6)) - self.assertEqual(mod_folding(self.x("x", 0, 6)-40, 5), self.x("x", 0, 6)) - self.assertEqual(mod_folding(self.x("x", 0, 6)+42, 5), (self.x("x", 0, 6)+2)) - self.assertEqual(mod_folding(self.x("x", 0, 6)-42, 5), (self.x("x", 0, 6)+3)) - self.assertEqual(mod_folding(40+self.x("x", 0, 6), 5), self.x("x", 0, 6)) - self.assertEqual(mod_folding(-40+self.x("x", 0, 6), 5), self.x("x", 0, 6)) - self.assertEqual(mod_folding(42+self.x("x", 0, 6), 5), (2+self.x("x", 0, 6))) - self.assertEqual(mod_folding(-42+self.x("x", 0, 6), 5), (3+self.x("x", 0, 6))) + self.assert_equiv_uops(mod_folding(self.x("x", 0, 6)+40, 5), self.x("x", 0, 6)) + self.assert_equiv_uops(mod_folding(self.x("x", 0, 6)-40, 5), self.x("x", 0, 6)) + self.assert_equiv_uops(mod_folding(self.x("x", 0, 6)+42, 5), (self.x("x", 0, 6)+2)) + self.assert_equiv_uops(mod_folding(self.x("x", 0, 6)-42, 5), (self.x("x", 0, 6)+3)) + self.assert_equiv_uops(mod_folding(40+self.x("x", 0, 6), 5), self.x("x", 0, 6)) + self.assert_equiv_uops(mod_folding(-40+self.x("x", 0, 6), 5), self.x("x", 0, 6)) + self.assert_equiv_uops(mod_folding(42+self.x("x", 0, 6), 5), (2+self.x("x", 0, 6))) + self.assert_equiv_uops(mod_folding(-42+self.x("x", 0, 6), 5), (3+self.x("x", 0, 6))) @unittest.skip("does not simplify to the end") def test_mul_mod(self): - self.assertEqual(mod_folding(self.x("x", 0, 6)*40, 5), self.c(0)) - self.assertEqual(mod_folding(self.x("x", 0, 6)*-40, 5), self.c(0)) - self.assertEqual(mod_folding(self.x("x", 0, 6)*42, 5), (self.x("x", 0, 6)*2)) - self.assertEqual(mod_folding(self.x("x", 0, 6)*-42, 5), (self.x("x", 0, 6)*3)) - self.assertEqual(mod_folding(40*self.x("x", 0, 6), 5), self.c(0)) - self.assertEqual(mod_folding(-40*self.x("x", 0, 6), 5), self.c(0)) - self.assertEqual(mod_folding(42*self.x("x", 0, 6), 5), (2*self.x("x", 0, 6))) - self.assertEqual(mod_folding(-42*self.x("x", 0, 6), 5), (3*self.x("x", 0, 6))) + self.assert_equiv_uops(mod_folding(self.x("x", 0, 6)*40, 5), self.c(0)) + self.assert_equiv_uops(mod_folding(self.x("x", 0, 6)*-40, 5), self.c(0)) + self.assert_equiv_uops(mod_folding(self.x("x", 0, 6)*42, 5), (self.x("x", 0, 6)*2)) + self.assert_equiv_uops(mod_folding(self.x("x", 0, 6)*-42, 5), (self.x("x", 0, 6)*3)) + self.assert_equiv_uops(mod_folding(40*self.x("x", 0, 6), 5), self.c(0)) + self.assert_equiv_uops(mod_folding(-40*self.x("x", 0, 6), 5), self.c(0)) + self.assert_equiv_uops(mod_folding(42*self.x("x", 0, 6), 5), (2*self.x("x", 0, 6))) + self.assert_equiv_uops(mod_folding(-42*self.x("x", 0, 6), 5), (3*self.x("x", 0, 6))) @unittest.skip("does not simplify to the end now") def test_mul_add_mod(self): x = self.x("x", 0, 10) y = self.x("y", 0, 10) z = self.x("z", 0, 10) - self.assertEqual(mod_folding(x*40+y*12+z, 5), (y*2+z)) + self.assert_equiv_uops(mod_folding(x*40+y*12+z, 5), (y*2+z)) if __name__ == '__main__': diff --git a/test/test_uops.py b/test/test_uops.py index 69873c6a..443f2c50 100644 --- a/test/test_uops.py +++ b/test/test_uops.py @@ -1,5 +1,5 @@ from typing import Optional, Tuple, Any, List -import unittest, math, time +import unittest, math import numpy as np from tinygrad.tensor import Tensor, _to_np_dtype from tinygrad.helpers import CI, DEBUG, getenv, Context @@ -11,7 +11,7 @@ from tinygrad.engine.schedule import create_schedule from tinygrad.engine.realize import CompiledRunner, lower_schedule_item, get_kernel from tinygrad.codegen.uops import UOps, NOp, UOp from tinygrad.codegen.uopgraph import UOpGraph -from test.helpers import is_dtype_supported +from test.helpers import is_dtype_supported, TestUOps as TestEqUOps def _uops_to_prg(uops_list, print_uops=False): uops = UOpGraph(uops_list) @@ -357,39 +357,7 @@ class TestUOpCompare(unittest.TestCase): mul = UOp(UOps.ALU, dtypes.float, (a, b), BinaryOps.MUL) assert (add < mul) or (mul < add), "add and mul with same src should have an order" - def test_uop_eq_fields(self): - a = UOp(UOps.CONST, dtypes.float, (), 2.0) - b = UOp(UOps.CONST, dtypes.float, (), 2.0) - self.assertEqual(a, b) - - def test_uop_ne_fields(self): - a = UOp(UOps.RANGE, dtypes.pyint, (UOp.const(dtypes.pyint, 0), UOp.const(dtypes.pyint, 1)), (1, False)) - b = UOp(UOps.RANGE, dtypes.pyint, (UOp.const(dtypes.pyint, 0), UOp.const(dtypes.pyint, 2)), (1, False)) - self.assertNotEqual(a, b) - - def test_recursive_eq_src(self): - st = time.perf_counter() - buf = UOp(UOps.DEFINE_GLOBAL, PtrDType(dtypes.int), (), 0) - idx = UOp.const(dtypes.int, 0) - a = UOp(UOps.LOAD, dtypes.float, (buf, idx)) - for _ in range(24): a += a - b = UOp(UOps.LOAD, dtypes.float, (buf, idx)) - for _ in range(24): b += b - self.assertEqual(a, b) - self.assertLess(time.perf_counter()-st, 1e-2) - - # NOTE: NOp uses the dataclass compare, this is fine - def test_nop_ne(self): - a = NOp(UOps.CONST, dtypes.float, (), 2.0, name="a") - b = NOp(UOps.CONST, dtypes.float, (), 2.0, name="b") - self.assertNotEqual(a, b) - - def test_nop_eq(self): - a1 = NOp(UOps.CONST, dtypes.float, (), 2.0, name="a") - a2 = NOp(UOps.CONST, dtypes.float, (), 2.0, name="a") - self.assertEqual(a1, a2) - -class TestUOpStr(unittest.TestCase): +class TestUOpStr(TestEqUOps): def test_uop_str(self): a = UOp(UOps.CONST, dtypes.float, (), 2.0) + UOp(UOps.CONST, dtypes.float, (), 3.0) for _ in range(20): a = a + a @@ -401,7 +369,7 @@ class TestUOpStr(unittest.TestCase): # nice big complicated uop with Context(NOOPT=1): sink = get_kernel(Device[Device.DEFAULT].renderer, t.schedule()[-1].ast).linearize().uops.sink - self.assertEqual(sink, eval(str(sink))) + self.assert_equiv_uops(sink, eval(str(sink))) def test_nop_str(self): a = NOp(UOps.CONST, dtypes.float, (), 2.0, name="c0") + NOp(UOps.CONST, dtypes.float, (), 3.0, name="c1") diff --git a/tinygrad/codegen/uops.py b/tinygrad/codegen/uops.py index 3ed1c16e..37761798 100644 --- a/tinygrad/codegen/uops.py +++ b/tinygrad/codegen/uops.py @@ -43,15 +43,6 @@ class UOp: return (self.op.value, (self.arg if self.op is not UOps.DEFINE_VAR else self.arg.expr) if self.op is not UOps.ALU else \ self.arg.value, self.dtype, self.src) def __lt__(self, x:UOp): return self.cmp_tuple < x.cmp_tuple - def cached_eq(self, x:UOp, context:Dict[Tuple[int, int], bool]) -> bool: - if id(self) == id(x): return True - if self.op != x.op or self.dtype != x.dtype or self.arg != x.arg or len(self.src) != len(x.src): return False - if (key := (id(self), id(x))) in context: return context[key] - return context.setdefault(key, all(a.cached_eq(b, context) for a,b in zip(self.src, x.src))) - def __eq__(self, x): return self.cached_eq(x, context={}) - @functools.cached_property - def hash(self): return hash((self.op, self.dtype, self.src, self.arg)) - def __hash__(self): return self.hash def __repr__(self): return pretty_print(self, lambda x: f"{type(self).__name__}({x.op}, {x.dtype}, arg={x.arg}, src=(%s))") # *** uop syntactic sugar def ufix(self, x): return self.const(x) if not isinstance(x, UOp) else x