Add needed contiguous to DiskBuffer. SHM support on OSX (#1891)

* add some contiguous

* remove second contig

* Revert "remove second contig"

This reverts commit fc164f7dca1ad75b1e466e4e45a05eca58b7e0e0.

* shm on osx

* can repro bug

* don't contig zeros and ones
This commit is contained in:
George Hotz 2023-09-22 09:16:42 +08:00 committed by GitHub
parent d0e752003d
commit 78576915de
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 15 additions and 11 deletions

View File

@ -26,7 +26,7 @@ def run():
# create a tensor to send
t = Tensor.zeros(SIZE, SIZE) if rank == 0 else Tensor.ones(SIZE, SIZE)
t2 = allreduce_jit(t.contiguous().realize(), cache_id="test")
assert np.allclose(np.ones((SIZE, SIZE)), t2.numpy())
assert np.allclose(np.ones((SIZE, SIZE)), t2.numpy()), f"{t2.numpy()} wasn't ones"
# reset jit
allreduce_jit.cnt = 0
@ -36,7 +36,7 @@ def run():
# create a tensor to send
t = Tensor.ones(SIZE_2, SIZE_2, SIZE_2) if rank == 0 else Tensor.zeros(SIZE_2, SIZE_2, SIZE_2)
t2 = allreduce_jit(t.contiguous().realize(), cache_id="test2")
assert np.allclose(np.ones((SIZE_2, SIZE_2, SIZE_2)), t2.numpy())
assert np.allclose(np.ones((SIZE_2, SIZE_2, SIZE_2)), t2.numpy()), f"{t2.numpy()} wasn't ones"
print(f"rank {rank} passed")

View File

@ -1,11 +1,10 @@
import unittest
import multiprocessing.shared_memory as shared_memory
from tinygrad.helpers import CI, OSX
from tinygrad.helpers import CI
from tinygrad.runtime.ops_shm import RawShmBuffer
from tinygrad.tensor import Tensor, Device
import numpy as np
@unittest.skipIf(OSX, "no shm on OSX")
class TestRawShmBuffer(unittest.TestCase):
def test_e2e(self):
t = Tensor.randn(2, 2, 2).realize()

View File

@ -2,7 +2,7 @@ import os, mmap
try: import _posixshmem # type: ignore
except Exception: pass
from typing import Callable, Dict
from tinygrad.helpers import DType
from tinygrad.helpers import DType, OSX
from tinygrad.runtime.lib import RawBufferMapped
from tinygrad.ops import Interpreted, Op, UnaryOps, MovementOps
@ -13,11 +13,16 @@ class RawShmBuffer(RawBufferMapped):
if self.cache_id is not None and self.cache_id in SHM_CACHE: shm = SHM_CACHE[self.cache_id]
else:
fd = _posixshmem.shm_open(device, os.O_RDWR, 0o600)
# TODO: these flags are somewhat platform specific, but python doesn't expose the ones we need
shm = mmap.mmap(fd, size * dtype.itemsize, flags=mmap.MAP_SHARED | 0x2000 | 0x008000)
shm.madvise(mmap.MADV_HUGEPAGE) # type: ignore
os.close(fd)
if OSX:
with open(f"/tmp/shm_{device}", "w+b") as f:
f.truncate(size * dtype.itemsize)
shm = mmap.mmap(f.fileno(), size * dtype.itemsize, flags=mmap.MAP_SHARED)
else:
fd = _posixshmem.shm_open(device, os.O_RDWR, 0o600)
# TODO: these flags are somewhat platform specific, but python doesn't expose the ones we need
shm = mmap.mmap(fd, size * dtype.itemsize, flags=mmap.MAP_SHARED | 0x2000 | 0x008000)
shm.madvise(mmap.MADV_HUGEPAGE) # type: ignore
os.close(fd)
if self.cache_id is not None: SHM_CACHE[self.cache_id] = shm
super().__init__(size, dtype, shm)

View File

@ -91,7 +91,7 @@ class Tensor:
# TODO: this is a hack for writing to DISK
if self.device.startswith("DISK"):
if x.__class__ is not Tensor: x = Tensor(x, device="CPU", dtype=self.dtype)
self.lazydata.realize().realized._copyin(x.numpy()) # type: ignore
self.lazydata.contiguous().realize().realized._copyin(x.numpy()) # type: ignore
return self
if x.__class__ is not Tensor: x = Tensor(x, device=self.device, dtype=self.dtype)
assert self.shape == x.shape and self.device == x.device, f"assign shape mismatch {self.shape} != {x.shape} or device mismatch {self.device} != {x.device}"