From ae849d12d791501c39a7e9dbd68393a808229325 Mon Sep 17 00:00:00 2001
From: George Hotz <72895+geohot@users.noreply.github.com>
Date: Tue, 9 Apr 2024 13:19:30 -0700
Subject: [PATCH] numpy device + pickle it (#4120)

---
 test/external/external_test_example.py |  2 +-
 test/test_pickle.py                    |  6 ++++++
 test/test_schedule.py                  |  2 +-
 tinygrad/buffer.py                     |  1 +
 tinygrad/runtime/ops_ext.py            | 12 ------------
 tinygrad/runtime/ops_npy.py            |  9 +++++++++
 tinygrad/tensor.py                     |  9 ++++-----
 7 files changed, 22 insertions(+), 19 deletions(-)
 delete mode 100644 tinygrad/runtime/ops_ext.py
 create mode 100644 tinygrad/runtime/ops_npy.py

diff --git a/test/external/external_test_example.py b/test/external/external_test_example.py
index 78c7402b..2e83f301 100644
--- a/test/external/external_test_example.py
+++ b/test/external/external_test_example.py
@@ -7,7 +7,7 @@ def multidevice_test(fxn):
   exclude_devices = getenv("EXCLUDE_DEVICES", "").split(",")
   def ret(self):
     for device in Device._devices:
-      if device in ["DISK", "EXT", "FAKE"]: continue
+      if device in ["DISK", "NPY", "FAKE"]: continue
       if not CI: print(device)
       if device in exclude_devices:
         if not CI: print(f"WARNING: {device} test is excluded")
diff --git a/test/test_pickle.py b/test/test_pickle.py
index f5ea145f..8e72a18b 100644
--- a/test/test_pickle.py
+++ b/test/test_pickle.py
@@ -15,6 +15,12 @@ class TestPickle(unittest.TestCase):
     t2:Tensor = pickle.loads(st)
     np.testing.assert_equal(t.numpy(), t2.numpy())
 
+  def test_pickle_numpy(self):
+    t = Tensor(np.array([1,2,3,4.]))
+    st = pickle.dumps(t)
+    t2:Tensor = pickle.loads(st)
+    np.testing.assert_equal(t.numpy(), t2.numpy())
+
   @unittest.expectedFailure
   def test_pickle_jit(self):
     @TinyJit
diff --git a/test/test_schedule.py b/test/test_schedule.py
index bdc002df..e3177b1b 100644
--- a/test/test_schedule.py
+++ b/test/test_schedule.py
@@ -350,7 +350,7 @@ class TestSchedule(unittest.TestCase):
 
   def test_double_from(self):
     x = Tensor([1,2,3,4])
-    out = x.to('ext')
+    out = x.to('npy')
     check_schedule(out, 0, filter_loadops=False)
 
   def test_pow_const_tensor_simplified(self):
diff --git a/tinygrad/buffer.py b/tinygrad/buffer.py
index 3677cc5b..eb17846d 100644
--- a/tinygrad/buffer.py
+++ b/tinygrad/buffer.py
@@ -29,6 +29,7 @@ class Buffer:
     return self
   def __reduce__(self):
     buf = None
+    if self.device == "NPY": return self.__class__, (self.device, self.size, self.dtype, self._buf, self.options)
     if hasattr(self, '_buf'):
       buf = bytearray(self.nbytes)
       self.copyout(memoryview(buf))
diff --git a/tinygrad/runtime/ops_ext.py b/tinygrad/runtime/ops_ext.py
deleted file mode 100644
index f4c8fc1f..00000000
--- a/tinygrad/runtime/ops_ext.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from typing import Tuple, Any
-from tinygrad.device import Compiled, Allocator
-
-# the Any is an arbitrary object that's kept in scope with the memoryview
-class ExtAllocator(Allocator):
-  # NOTE: this doesn't work with allow_zero_copy, it's read only somehow
-  #def as_buffer(self, src:Tuple[memoryview, Any]) -> memoryview: return src[0]
-  def copyin(self, dest:Tuple[memoryview, Any], src:memoryview): dest[0][:] = src
-  def copyout(self, dest:memoryview, src:Tuple[memoryview, Any]): dest[:] = src[0]
-
-class ExtDevice(Compiled):
-  def __init__(self, device:str): super().__init__(device, ExtAllocator(), None, None)
diff --git a/tinygrad/runtime/ops_npy.py b/tinygrad/runtime/ops_npy.py
new file mode 100644
index 00000000..c8121b9a
--- /dev/null
+++ b/tinygrad/runtime/ops_npy.py
@@ -0,0 +1,9 @@
+import numpy as np
+from tinygrad.helpers import flat_mv
+from tinygrad.device import Compiled, Allocator
+
+class NpyAllocator(Allocator):
+  def copyout(self, dest:memoryview, src:np.ndarray): dest[:] = flat_mv(np.require(src, requirements='C').data)
+
+class NpyDevice(Compiled):
+  def __init__(self, device:str): super().__init__(device, NpyAllocator(), None, None)
diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py
index d29bfd7c..a17503ef 100644
--- a/tinygrad/tensor.py
+++ b/tinygrad/tensor.py
@@ -7,8 +7,7 @@ from collections import defaultdict
 import numpy as np
 
 from tinygrad.dtype import DType, dtypes, ImageDType, ConstType, least_upper_float, least_upper_dtype
-from tinygrad.helpers import argfix, make_pair, flatten, prod, all_int, round_up, merge_dicts, fully_flatten, flat_mv, argsort
-from tinygrad.helpers import IMAGE, DEBUG, WINO, THREEFRY
+from tinygrad.helpers import argfix, make_pair, flatten, prod, all_int, round_up, merge_dicts, fully_flatten, argsort, IMAGE, DEBUG, WINO, THREEFRY
 from tinygrad.lazy import LazyBuffer
 from tinygrad.features.multi import MultiLazyBuffer
 from tinygrad.ops import LoadOps
@@ -45,9 +44,9 @@ def _loadop(op, shape:Tuple[sint,...], dtype:DType, device:Union[str, Tuple[str,
   return MultiLazyBuffer([LazyBuffer.loadop(op, shape, dtype, d, arg, src) for d in device], None)
 
 def _fromcpu(x: np.ndarray) -> LazyBuffer:
-  ret = LazyBuffer.loadop(LoadOps.EMPTY, x.shape, dtypes.from_np(x.dtype), "EXT")
+  ret = LazyBuffer.loadop(LoadOps.EMPTY, x.shape, dtypes.from_np(x.dtype), "NPY")
   # fake realize
-  ret.buffer.allocate((memoryview(bytearray()), None) if x.size == 0 else (flat_mv(np.require(x, requirements='C').data), x))
+  ret.buffer.allocate(x)
   del ret.srcs
   return ret
 
@@ -155,7 +154,7 @@ class Tensor:
   def assign(self, x) -> Tensor:
     # TODO: this is a hack for writing to DISK. remove with working assign
     if isinstance(self.device, str) and self.device.startswith("DISK"):
-      if x.__class__ is not Tensor: x = Tensor(x, device="EXT", dtype=self.dtype)
+      if x.__class__ is not Tensor: x = Tensor(x, device="NPY", dtype=self.dtype)
       self.contiguous().realize().lazydata.base.realized.copyin(x.numpy().data)
       return self
     if x.__class__ is not Tensor: x = Tensor(x, device=self.device, dtype=self.dtype)