Revert "ops_ext to replace cpu import (#3406)" (#3408)

This reverts commit 91eb93f85a.
2024-02-15 12:16:10 +01:00 · 2024-02-15 12:16:10 +01:00 · 6356474d6d
parent 91eb93f85a
commit 6356474d6d
9 changed files with 11 additions and 29 deletions
--- a/docs/abstractions.py
+++ b/docs/abstractions.py
@ -135,8 +135,8 @@ assert len(lazyop.srcs) == 2
 # the source is a LazyBuffer that is a "CPU" Tensor
 # again, a LazyOp AST is like a GPU kernel. you have to copy the data on the device first
 assert lazyop.srcs[0].op == LoadOps.COPY
-assert lazyop.srcs[0].srcs[0].device == "EXT"
-assert lazyop.srcs[0].srcs[0].realized._buf[0][0] == 2, "the src of the COPY LazyOP is a LazyBuffer on the CPU holding [2]"
+assert lazyop.srcs[0].srcs[0].device == "CPU"
+assert lazyop.srcs[0].srcs[0].realized._buf[0] == 2, "the src of the COPY LazyOP is a LazyBuffer on the CPU holding [2]"
 assert result.lazydata.base.realized is None, "the LazyBuffer is not realized yet"

 # now we realize the LazyBuffer
--- a/test/external/external_test_example.py
+++ b/test/external/external_test_example.py
@ -7,7 +7,7 @@ def multidevice_test(fxn):
  exclude_devices = getenv("EXCLUDE_DEVICES", "").split(",")
  def ret(self):
    for device in Device._devices:
-      if device in ["DISK", "EXT", "FAKE"]: continue
+      if device in ["DISK", "FAKE"]: continue
      if not CI: print(device)
      if device in exclude_devices:
        if not CI: print(f"WARNING: {device} test is excluded")
--- a/test/test_schedule.py
+++ b/test/test_schedule.py
@ -350,7 +350,7 @@ class TestSchedule(unittest.TestCase):

  def test_double_from(self):
    x = Tensor([1,2,3,4])
-    out = x.to('ext')
+    out = x.to('cpu')
    check_schedule(out, 0, filter_loadops=False)

  def test_pow_const_tensor_simplified(self):
--- a/tinygrad/codegen/kernel.py
+++ b/tinygrad/codegen/kernel.py
@ -68,7 +68,7 @@ class LinearizerOptions(NamedTuple):

 class Kernel:
  def __init__(self, ast:LazyOp, opts:Optional[LinearizerOptions]=None):
-    self.opts = opts or (device.compiler.linearizer_opts if isinstance(device:=Device[Device.DEFAULT], Compiled) and device.compiler is not None else
+    self.opts = opts or (device.compiler.linearizer_opts if isinstance(device:=Device[Device.DEFAULT], Compiled) else
                         LinearizerOptions(Device.DEFAULT))
    self.ast = ast
    assert ast.op == BufferOps.STORE, f"kernels must have a store as the output, got {ast.op}"
--- a/tinygrad/device.py
+++ b/tinygrad/device.py
@ -281,7 +281,6 @@ class CompiledASTRunner(JITRunner):
    if local_size is not None: local_size = local_size + [1]*(3-len(local_size))
    self.name, self.display_name, self.prg, self.device, self.global_size, self.local_size, self.first_run = \
      to_function_name(name), name, prg, device, global_size, local_size, True
-    assert self.device.compiler is not None, "compiler is reuired to make an AST kernel"
    lib:bytes = precompiled if precompiled is not None else self.device.compiler.compile_cached(prg)
    self.lib, self.clprg = lib, self.device.runtime(self.name, lib)
    self.vars: List[Variable] = []
@ -313,17 +312,15 @@ class CompiledASTRunner(JITRunner):
    return et

 class Compiled:
-  def __init__(self, device:str, allocator:Allocator, compiler:Optional[Compiler], runtime, graph=None):
+  def __init__(self, device:str, allocator:Allocator, compiler:Compiler, runtime, graph=None):
    self.dname, self.allocator, self.compiler, self.runtime, self.graph = device, allocator, compiler, runtime, graph
  def synchronize(self): pass  # override this in your device

  def to_program(self, k:Linearizer) -> CompiledASTRunner:
-    assert self.compiler is not None, "compiler is required to run AST"
    k.linearize()
    return CompiledASTRunner(k.ast, k.name, self.compiler.render(to_function_name(k.name), k.uops), self, k.global_size, k.local_size)

  def get_linearizer(self, ast:LazyOp) -> Linearizer:
-    assert self.compiler is not None, "compiler is required to build AST"
    if DEBUG >= 3:
      from tinygrad.features.graph import print_tree
      print_tree(ast)
--- a/tinygrad/features/search.py
+++ b/tinygrad/features/search.py
@ -153,7 +153,7 @@ def time_linearizer(lin:Linearizer, rawbufs:List[Buffer], allow_test_size=True,
  if not disable_cache and CACHELEVEL >= 2 and (val:=diskcache_get("time_linearizer", key)) is not None: return min(val)

  dev = Device[lin.opts.device]
-  assert isinstance(dev, Compiled) and dev.compiler is not None
+  assert isinstance(dev, Compiled)

  var_vals = {k:(k.max+k.min)//2 for k in lin.ast.vars()}
  lib, global_size, local_size = _compile_linearizer(dev.compiler, lin)
--- a/tinygrad/runtime/ops_ext.py
+++ b/tinygrad/runtime/ops_ext.py
@ -1,12 +0,0 @@
-from typing import Tuple, Any
-from tinygrad.device import Compiled, Allocator
-
-# the Any is an arbitrary object that's kept in scope with the memoryview
-class ExtAllocator(Allocator):
-  # NOTE: this doesn't work with allow_zero_copy, it's read only somehow
-  #def as_buffer(self, src:Tuple[memoryview, Any]) -> memoryview: return src[0]
-  def copyin(self, dest:Tuple[memoryview, Any], src:memoryview): dest[0][:] = src
-  def copyout(self, dest:memoryview, src:Tuple[memoryview, Any]): dest[:] = src[0]
-
-class ExtDevice(Compiled):
-  def __init__(self, device:str): super().__init__(device, ExtAllocator(), None, None)
--- a/tinygrad/runtime/ops_python.py
+++ b/tinygrad/runtime/ops_python.py
@ -93,7 +93,7 @@ class PythonProgram:
          ul[i] = [pbufs.pop(0).cast(dtype.fmt)] * warp_size
        elif uop is UOps.DEFINE_LOCAL:
          assert dtype.fmt is not None
-          lbuf = memoryview(bytearray(arg[1]*dtype.itemsize))
+          lbuf = memoryview(bytearray(arg[1]*dtype.sz))
          ul[i] = [lbuf.cast(dtype.fmt)] * warp_size
        elif uop is UOps.SPECIAL:
          if arg[1][0] == 'g':
--- a/tinygrad/tensor.py
+++ b/tinygrad/tensor.py
@ -7,7 +7,7 @@ from functools import partialmethod, reduce
 import numpy as np

 from tinygrad.dtype import DType, dtypes, ImageDType, Scalar, least_upper_float, least_upper_dtype
-from tinygrad.helpers import argfix, make_pair, getenv, IMAGE, DEBUG, WINO, flatten, prod, all_int, round_up, merge_dicts, fully_flatten, flat_mv
+from tinygrad.helpers import argfix, make_pair, getenv, IMAGE, DEBUG, WINO, flatten, prod, all_int, round_up, merge_dicts, fully_flatten
 from tinygrad.lazy import LazyBuffer
 from tinygrad.features.multi import MultiLazyBuffer
 from tinygrad.ops import LoadOps
@ -42,11 +42,8 @@ def _loadop(op, shape:Tuple[sint,...], dtype:DType, device:Union[str, Tuple[str,
  return MultiLazyBuffer([LazyBuffer.loadop(op, shape, dtype, d, arg, src) for d in device], None)

 def _fromcpu(x: np.ndarray) -> LazyBuffer:
-  ret = LazyBuffer.loadop(LoadOps.EMPTY, x.shape, dtypes.from_np(x.dtype), "EXT")
-  if x.size == 0:
-    ret.realized = Buffer("EXT", 0, dtypes.from_np(x.dtype), (memoryview(bytearray()), None))
-  else:
-    ret.realized = Buffer("EXT", prod(x.shape), dtypes.from_np(x.dtype), (flat_mv(np.require(x, requirements='C').data), x))
+  ret = LazyBuffer.loadop(LoadOps.EMPTY, x.shape, dtypes.from_np(x.dtype), "CPU")
+  ret.realized = Buffer("CPU", prod(x.shape), dtypes.from_np(x.dtype), x.flatten())
  return ret

 def _get_winograd_matcols(mat, dims:int, shp:Tuple[sint, ...], device:Union[str, Tuple[str, ...]]) -> List[List[Tensor]]: