From ba8602612bff7eabeff77b416f84f45f0d3930a4 Mon Sep 17 00:00:00 2001 From: qazal <77887910+Qazalin@users.noreply.github.com> Date: Wed, 17 Apr 2024 04:03:21 +0300 Subject: [PATCH] Fuzz all permutations of schedule (#4136) * simple toposort * fuzzer * init in_degree * move to tests * same seed * configure paths * internal graph * compare LazyBuffers * simpler * simple graph * assign works * simpler * fix JIT * upstream ci * move ci * fix the path * DEBUG=1 * limit max paths * launch a cmp kernel * Revert "launch a cmp kernel" This reverts commit 791c6089922fa7d800456f28fc167842f188ac7e. * exec ground truth * better perf * copy ground truth once * gpu allclose ast try1 * Revert "gpu allclose ast try1" This reverts commit 1f82103af3a7bfedb9f858b6c58b0b94f1c7e6b0. * prerealized bufs freezing * teeny cleanups * reuse Buffers * Revert "reuse Buffers" This reverts commit a71de94b035bd5ceb1ec257f6b2529b166bcd30b. --------- Co-authored-by: George Hotz <72895+geohot@users.noreply.github.com> --- .github/workflows/test.yml | 2 + test/external/fuzz_schedule.py | 84 ++++++++++++++++++++++++++++++++++ tinygrad/buffer.py | 2 +- tinygrad/engine/schedule.py | 11 +++-- tinygrad/tensor.py | 4 ++ 5 files changed, 99 insertions(+), 4 deletions(-) create mode 100644 test/external/fuzz_schedule.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index decbc85a..9aaf0895 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -317,6 +317,8 @@ jobs: run: PYTHONPATH="." METAL=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py - name: Fuzz Test linearizer run: PYTHONPATH="." METAL=1 CACHELEVEL=0 FUZZ_ALL_ACTIONS=1 DEPTH=2 FUZZ_N=48 FUZZ_MAX_SIZE=10000000 python test/external/fuzz_linearizer.py + - name: Fuzz Test models schedule + run: FUZZ_SCHEDULE=1 FUZZ_SCHEDULE_MAX_PATHS=5 python -m pytest test/models/test_train.py test/models/test_end2end.py # testwebgl: diff --git a/test/external/fuzz_schedule.py b/test/external/fuzz_schedule.py new file mode 100644 index 00000000..304a3fba --- /dev/null +++ b/test/external/fuzz_schedule.py @@ -0,0 +1,84 @@ +import numpy as np +from typing import DefaultDict, Dict, List, Set, TypeVar +from tinygrad.buffer import Buffer +from tinygrad.engine.realize import CustomOp, ExecItem, capturing, lower_schedule_item +from tinygrad.helpers import DEBUG, colored, getenv +from tinygrad.lazy import LazyBuffer +from tinygrad.engine.schedule import _graph_schedule +from tinygrad.ops import LoadOps, ScheduleItem +from tinygrad.tensor import Tensor + +def fuzz_schedule(outs: List[LazyBuffer]): + graph, in_degree, prescheduled = _graph_schedule(outs, seen:=set()) + toposorts = find_all_toposorts(graph, in_degree) + if DEBUG >= 1: print(colored(f"fuzzing {len(toposorts)} schedule permutations", "yellow")) + + # setup ground truth + ground_truth: Dict[LazyBuffer, memoryview] = {} + # IMPORTANT: freeze prerealized bufs before ScheduleItem exec + prerealized: Dict[LazyBuffer, memoryview] = {} + seed = Tensor._seed + for key in toposorts[0]: + for out in (ps:=prescheduled[key]).outputs: + seen.add(out) + # freeze assign state before exec + if out.op is LoadOps.ASSIGN: prerealized[out] = out.buffer.as_buffer() + for x in ps.inputs: + if x not in ground_truth and x.device != "NPY": prerealized[x] = x.buffer.as_buffer() + si = ScheduleItem(ps.ast, tuple(x.buffer for x in ps.outputs if x.size != 0), tuple(x.buffer for x in ps.inputs if x.size != 0)) + _exec_si(si, seed) + for out in ps.outputs: ground_truth[out] = out.buffer.as_buffer() + + # exec and validate each permutation with new Buffers + for i, ts in enumerate(toposorts[1:]): + if DEBUG >= 1: print(colored(f"testing permutation {i}", "yellow")) + rawbufs: Dict[LazyBuffer, Buffer] = {} + for key in ts: + for out in (ps:=prescheduled[key]).outputs: + rawbufs[out] = Buffer(out.buffer.device, out.buffer.size, out.buffer.dtype) + if out.op is LoadOps.ASSIGN: rawbufs[out].ensure_allocated().copyin(prerealized[out]) + for x in ps.inputs: + if x not in rawbufs: + if x.device == "NPY": rawbufs[x] = x.buffer + # copy the pre realized input + else: rawbufs[x] = Buffer(x.buffer.device, x.buffer.size, x.buffer.dtype, initial_value=prerealized[x]) + si = ScheduleItem(ps.ast, tuple(rawbufs[x] for x in ps.outputs if x.size != 0), tuple(rawbufs[x] for x in ps.inputs if x.size != 0)) + _exec_si(si, seed) + for out in ps.outputs: + outbuf = np.frombuffer(rawbufs[out].as_buffer(), out.dtype.np) + try: np.testing.assert_allclose(outbuf, np.frombuffer(ground_truth[out], out.dtype.np), atol=1e-2, rtol=1e-2) + except Exception as e: + print(f"FAILED FOR {out}") + raise e + +def _exec_si(si: ScheduleItem, seed:int): + ei = ExecItem(lower_schedule_item(si), list(si.outputs+si.inputs)) + if len(capturing): capturing[0].add(ei) + if isinstance(ei.prg, CustomOp): Tensor._seed = seed + ei.run() + +T = TypeVar("T") +def find_all_toposorts(graph:DefaultDict[T, List[T]], in_degree:DefaultDict[T, int]) -> List[List[T]]: + visited: Set[T] = set() + ret: List[List[T]] = [] + path: List[T] = [] + + def recurse_paths(path:List[T]): + for v, d in in_degree.items(): + if d != 0 or v in visited: continue + for u in graph[v]: in_degree[u] -= 1 + path.append(v) + visited.add(v) + recurse_paths(path) + if len(ret) >= getenv("FUZZ_SCHEDULE_MAX_PATHS", 10): return + # backtrack + for u in graph[v]: in_degree[u] += 1 + path.pop() + visited.remove(v) + if len(path) == len(in_degree): ret.append([*path]) + recurse_paths(path) + + if len(ret) == 0: raise RuntimeError("detected cycle in the graph") + # verify all paths are unique + assert len(ret) == len(set(map(tuple, ret))) + return ret diff --git a/tinygrad/buffer.py b/tinygrad/buffer.py index 882df8f2..e4daac74 100644 --- a/tinygrad/buffer.py +++ b/tinygrad/buffer.py @@ -58,4 +58,4 @@ class Buffer: mv = flat_mv(mv) assert len(mv) == self.nbytes, f"size mismatch, {len(mv)=} != {self.dtype=} {self.size=}" self.allocator.copyout(mv, self._buf) - return mv \ No newline at end of file + return mv diff --git a/tinygrad/engine/schedule.py b/tinygrad/engine/schedule.py index 56345d20..087ca3d5 100644 --- a/tinygrad/engine/schedule.py +++ b/tinygrad/engine/schedule.py @@ -128,9 +128,8 @@ def _is_padding_okay(buf:LazyBuffer, realizes:Dict[LazyBuffer, None]) -> bool: if buf.op in UNSAFE_PAD_OPS: return False return all(_is_padding_okay(x.base, realizes) for x in buf.srcs) -def create_schedule_with_vars(outs:List[LazyBuffer], seen:Optional[Set[LazyBuffer]]=None) -> Tuple[List[ScheduleItem], Dict[Variable, int]]: - if seen is None: seen = set() - +def _graph_schedule(outs:List[LazyBuffer], seen:Set[LazyBuffer]) -> Tuple[DefaultDict[LazyBuffer, List[LazyBuffer]], DefaultDict[LazyBuffer, int], + Dict[LazyBuffer, _LBScheduleItem]]: # start by just realizing the buffers passed in realizes: Dict[LazyBuffer, None] = {x.base: None for x in outs if not x.base.realized} allbufs: Dict[LazyBuffer, None] = {} @@ -209,6 +208,7 @@ def create_schedule_with_vars(outs:List[LazyBuffer], seen:Optional[Set[LazyBuffe graph: DefaultDict[LazyBuffer, List[LazyBuffer]] = defaultdict(list) in_degree: DefaultDict[LazyBuffer, int] = defaultdict(int) for key, lsi in prescheduled.items(): + if key not in in_degree: in_degree[key] = 0 # realize outputs after all parents are realized scheduled_parents = set(schedule_targets[x].outputs[0] for x in lsi.inputs if x in schedule_targets) for x in scheduled_parents: @@ -221,6 +221,11 @@ def create_schedule_with_vars(outs:List[LazyBuffer], seen:Optional[Set[LazyBuffe in_degree[assign] += 1 for out in lsi.outputs: del out.srcs # can only schedule once + return graph, in_degree, prescheduled + +def create_schedule_with_vars(outs:List[LazyBuffer], seen:Optional[Set[LazyBuffer]]=None) -> Tuple[List[ScheduleItem], Dict[Variable, int]]: + if seen is None: seen = set() + graph, in_degree, prescheduled = _graph_schedule(outs, seen) queue = deque(si for key, si in prescheduled.items() if in_degree[key] == 0) schedule: List[ScheduleItem] = [] var_vals: Dict[Variable, int] = {} diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py index d690b123..03c700a3 100644 --- a/tinygrad/tensor.py +++ b/tinygrad/tensor.py @@ -8,6 +8,7 @@ import numpy as np from tinygrad.dtype import DType, dtypes, ImageDType, ConstType, least_upper_float, least_upper_dtype from tinygrad.helpers import argfix, make_pair, flatten, prod, all_int, round_up, merge_dicts, fully_flatten, argsort, IMAGE, DEBUG, WINO, THREEFRY +from tinygrad.helpers import getenv from tinygrad.lazy import LazyBuffer from tinygrad.features.multi import MultiLazyBuffer from tinygrad.ops import LoadOps @@ -141,6 +142,9 @@ class Tensor: @staticmethod def corealize(lst:Iterable[Tensor]): + if getenv("FUZZ_SCHEDULE"): + from test.external.fuzz_schedule import fuzz_schedule + fuzz_schedule(flatten([x.lazydata.lbs for x in lst])) run_schedule(*create_schedule_with_vars(flatten([x.lazydata.lbs for x in lst]))) def realize(self) -> Tensor: