From 78576915deaf9edd4a2572879429fcb231052f35 Mon Sep 17 00:00:00 2001
From: George Hotz <72895+geohot@users.noreply.github.com>
Date: Fri, 22 Sep 2023 09:16:42 +0800
Subject: [PATCH] Add needed contiguous to DiskBuffer. SHM support on OSX
 (#1891)

* add some contiguous

* remove second contig

* Revert "remove second contig"

This reverts commit fc164f7dca1ad75b1e466e4e45a05eca58b7e0e0.

* shm on osx

* can repro bug

* don't contig zeros and ones
---
 test/external/dist/test_collectives.py |  4 ++--
 test/unit/test_shm_tensor.py           |  3 +--
 tinygrad/runtime/ops_shm.py            | 17 +++++++++++------
 tinygrad/tensor.py                     |  2 +-
 4 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/test/external/dist/test_collectives.py b/test/external/dist/test_collectives.py
index 4d482950..2378a988 100644
--- a/test/external/dist/test_collectives.py
+++ b/test/external/dist/test_collectives.py
@@ -26,7 +26,7 @@ def run():
     # create a tensor to send
     t = Tensor.zeros(SIZE, SIZE) if rank == 0 else Tensor.ones(SIZE, SIZE)
     t2 = allreduce_jit(t.contiguous().realize(), cache_id="test")
-    assert np.allclose(np.ones((SIZE, SIZE)), t2.numpy())
+    assert np.allclose(np.ones((SIZE, SIZE)), t2.numpy()), f"{t2.numpy()} wasn't ones"
 
   # reset jit
   allreduce_jit.cnt = 0
@@ -36,7 +36,7 @@ def run():
     # create a tensor to send
     t = Tensor.ones(SIZE_2, SIZE_2, SIZE_2) if rank == 0 else Tensor.zeros(SIZE_2, SIZE_2, SIZE_2)
     t2 = allreduce_jit(t.contiguous().realize(), cache_id="test2")
-    assert np.allclose(np.ones((SIZE_2, SIZE_2, SIZE_2)), t2.numpy())
+    assert np.allclose(np.ones((SIZE_2, SIZE_2, SIZE_2)), t2.numpy()), f"{t2.numpy()} wasn't ones"
 
   print(f"rank {rank} passed")
 
diff --git a/test/unit/test_shm_tensor.py b/test/unit/test_shm_tensor.py
index d8954b3e..77080660 100644
--- a/test/unit/test_shm_tensor.py
+++ b/test/unit/test_shm_tensor.py
@@ -1,11 +1,10 @@
 import unittest
 import multiprocessing.shared_memory as shared_memory
-from tinygrad.helpers import CI, OSX
+from tinygrad.helpers import CI
 from tinygrad.runtime.ops_shm import RawShmBuffer
 from tinygrad.tensor import Tensor, Device
 import numpy as np
 
-@unittest.skipIf(OSX, "no shm on OSX")
 class TestRawShmBuffer(unittest.TestCase):
   def test_e2e(self):
     t = Tensor.randn(2, 2, 2).realize()
diff --git a/tinygrad/runtime/ops_shm.py b/tinygrad/runtime/ops_shm.py
index dc38de92..79615dc0 100644
--- a/tinygrad/runtime/ops_shm.py
+++ b/tinygrad/runtime/ops_shm.py
@@ -2,7 +2,7 @@ import os, mmap
 try: import _posixshmem    # type: ignore
 except Exception: pass
 from typing import Callable, Dict
-from tinygrad.helpers import DType
+from tinygrad.helpers import DType, OSX
 from tinygrad.runtime.lib import RawBufferMapped
 from tinygrad.ops import Interpreted, Op, UnaryOps, MovementOps
 
@@ -13,11 +13,16 @@ class RawShmBuffer(RawBufferMapped):
 
     if self.cache_id is not None and self.cache_id in SHM_CACHE: shm = SHM_CACHE[self.cache_id]
     else:
-      fd = _posixshmem.shm_open(device, os.O_RDWR, 0o600)
-      # TODO: these flags are somewhat platform specific, but python doesn't expose the ones we need
-      shm = mmap.mmap(fd, size * dtype.itemsize, flags=mmap.MAP_SHARED | 0x2000 | 0x008000)
-      shm.madvise(mmap.MADV_HUGEPAGE)    # type: ignore
-      os.close(fd)
+      if OSX:
+        with open(f"/tmp/shm_{device}", "w+b") as f:
+          f.truncate(size * dtype.itemsize)
+          shm = mmap.mmap(f.fileno(), size * dtype.itemsize, flags=mmap.MAP_SHARED)
+      else:
+        fd = _posixshmem.shm_open(device, os.O_RDWR, 0o600)
+        # TODO: these flags are somewhat platform specific, but python doesn't expose the ones we need
+        shm = mmap.mmap(fd, size * dtype.itemsize, flags=mmap.MAP_SHARED | 0x2000 | 0x008000)
+        shm.madvise(mmap.MADV_HUGEPAGE)    # type: ignore
+        os.close(fd)
       if self.cache_id is not None: SHM_CACHE[self.cache_id] = shm
 
     super().__init__(size, dtype, shm)
diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py
index bd247f92..fa4d9bfe 100644
--- a/tinygrad/tensor.py
+++ b/tinygrad/tensor.py
@@ -91,7 +91,7 @@ class Tensor:
     # TODO: this is a hack for writing to DISK
     if self.device.startswith("DISK"):
       if x.__class__ is not Tensor: x = Tensor(x, device="CPU", dtype=self.dtype)
-      self.lazydata.realize().realized._copyin(x.numpy())  # type: ignore
+      self.lazydata.contiguous().realize().realized._copyin(x.numpy())  # type: ignore
       return self
     if x.__class__ is not Tensor: x = Tensor(x, device=self.device, dtype=self.dtype)
     assert self.shape == x.shape and self.device == x.device, f"assign shape mismatch {self.shape} != {x.shape} or device mismatch {self.device} != {x.device}"