Add needed contiguous to DiskBuffer. SHM support on OSX (#1891)

* add some contiguous * remove second contig * Revert "remove second contig" This reverts commit fc164f7dca1ad75b1e466e4e45a05eca58b7e0e0. * shm on osx * can repro bug * don't contig zeros and ones
2023-09-22 09:16:42 +08:00 · 2023-09-22 09:16:42 +08:00 · 78576915de
parent d0e752003d
commit 78576915de
4 changed files with 15 additions and 11 deletions
--- a/test/external/dist/test_collectives.py
+++ b/test/external/dist/test_collectives.py
@ -26,7 +26,7 @@ def run():
    # create a tensor to send
    t = Tensor.zeros(SIZE, SIZE) if rank == 0 else Tensor.ones(SIZE, SIZE)
    t2 = allreduce_jit(t.contiguous().realize(), cache_id="test")
-    assert np.allclose(np.ones((SIZE, SIZE)), t2.numpy())
+    assert np.allclose(np.ones((SIZE, SIZE)), t2.numpy()), f"{t2.numpy()} wasn't ones"

  # reset jit
  allreduce_jit.cnt = 0
@ -36,7 +36,7 @@ def run():
    # create a tensor to send
    t = Tensor.ones(SIZE_2, SIZE_2, SIZE_2) if rank == 0 else Tensor.zeros(SIZE_2, SIZE_2, SIZE_2)
    t2 = allreduce_jit(t.contiguous().realize(), cache_id="test2")
-    assert np.allclose(np.ones((SIZE_2, SIZE_2, SIZE_2)), t2.numpy())
+    assert np.allclose(np.ones((SIZE_2, SIZE_2, SIZE_2)), t2.numpy()), f"{t2.numpy()} wasn't ones"

  print(f"rank {rank} passed")

--- a/test/unit/test_shm_tensor.py
+++ b/test/unit/test_shm_tensor.py
@ -1,11 +1,10 @@
 import unittest
 import multiprocessing.shared_memory as shared_memory
-from tinygrad.helpers import CI, OSX
+from tinygrad.helpers import CI
 from tinygrad.runtime.ops_shm import RawShmBuffer
 from tinygrad.tensor import Tensor, Device
 import numpy as np

-@unittest.skipIf(OSX, "no shm on OSX")
 class TestRawShmBuffer(unittest.TestCase):
  def test_e2e(self):
    t = Tensor.randn(2, 2, 2).realize()
--- a/tinygrad/runtime/ops_shm.py
+++ b/tinygrad/runtime/ops_shm.py
@ -2,7 +2,7 @@ import os, mmap
 try: import _posixshmem    # type: ignore
 except Exception: pass
 from typing import Callable, Dict
-from tinygrad.helpers import DType
+from tinygrad.helpers import DType, OSX
 from tinygrad.runtime.lib import RawBufferMapped
 from tinygrad.ops import Interpreted, Op, UnaryOps, MovementOps

@ -13,11 +13,16 @@ class RawShmBuffer(RawBufferMapped):

    if self.cache_id is not None and self.cache_id in SHM_CACHE: shm = SHM_CACHE[self.cache_id]
    else:
-      fd = _posixshmem.shm_open(device, os.O_RDWR, 0o600)
-      # TODO: these flags are somewhat platform specific, but python doesn't expose the ones we need
-      shm = mmap.mmap(fd, size * dtype.itemsize, flags=mmap.MAP_SHARED | 0x2000 | 0x008000)
-      shm.madvise(mmap.MADV_HUGEPAGE)    # type: ignore
-      os.close(fd)
+      if OSX:
+        with open(f"/tmp/shm_{device}", "w+b") as f:
+          f.truncate(size * dtype.itemsize)
+          shm = mmap.mmap(f.fileno(), size * dtype.itemsize, flags=mmap.MAP_SHARED)
+      else:
+        fd = _posixshmem.shm_open(device, os.O_RDWR, 0o600)
+        # TODO: these flags are somewhat platform specific, but python doesn't expose the ones we need
+        shm = mmap.mmap(fd, size * dtype.itemsize, flags=mmap.MAP_SHARED | 0x2000 | 0x008000)
+        shm.madvise(mmap.MADV_HUGEPAGE)    # type: ignore
+        os.close(fd)
      if self.cache_id is not None: SHM_CACHE[self.cache_id] = shm

    super().__init__(size, dtype, shm)
--- a/tinygrad/tensor.py
+++ b/tinygrad/tensor.py
@ -91,7 +91,7 @@ class Tensor:
    # TODO: this is a hack for writing to DISK
    if self.device.startswith("DISK"):
      if x.__class__ is not Tensor: x = Tensor(x, device="CPU", dtype=self.dtype)
-      self.lazydata.realize().realized._copyin(x.numpy())  # type: ignore
+      self.lazydata.contiguous().realize().realized._copyin(x.numpy())  # type: ignore
      return self
    if x.__class__ is not Tensor: x = Tensor(x, device=self.device, dtype=self.dtype)
    assert self.shape == x.shape and self.device == x.device, f"assign shape mismatch {self.shape} != {x.shape} or device mismatch {self.device} != {x.device}"