From 78576915deaf9edd4a2572879429fcb231052f35 Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Fri, 22 Sep 2023 09:16:42 +0800 Subject: [PATCH] Add needed contiguous to DiskBuffer. SHM support on OSX (#1891) * add some contiguous * remove second contig * Revert "remove second contig" This reverts commit fc164f7dca1ad75b1e466e4e45a05eca58b7e0e0. * shm on osx * can repro bug * don't contig zeros and ones --- test/external/dist/test_collectives.py | 4 ++-- test/unit/test_shm_tensor.py | 3 +-- tinygrad/runtime/ops_shm.py | 17 +++++++++++------ tinygrad/tensor.py | 2 +- 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/test/external/dist/test_collectives.py b/test/external/dist/test_collectives.py index 4d482950..2378a988 100644 --- a/test/external/dist/test_collectives.py +++ b/test/external/dist/test_collectives.py @@ -26,7 +26,7 @@ def run(): # create a tensor to send t = Tensor.zeros(SIZE, SIZE) if rank == 0 else Tensor.ones(SIZE, SIZE) t2 = allreduce_jit(t.contiguous().realize(), cache_id="test") - assert np.allclose(np.ones((SIZE, SIZE)), t2.numpy()) + assert np.allclose(np.ones((SIZE, SIZE)), t2.numpy()), f"{t2.numpy()} wasn't ones" # reset jit allreduce_jit.cnt = 0 @@ -36,7 +36,7 @@ def run(): # create a tensor to send t = Tensor.ones(SIZE_2, SIZE_2, SIZE_2) if rank == 0 else Tensor.zeros(SIZE_2, SIZE_2, SIZE_2) t2 = allreduce_jit(t.contiguous().realize(), cache_id="test2") - assert np.allclose(np.ones((SIZE_2, SIZE_2, SIZE_2)), t2.numpy()) + assert np.allclose(np.ones((SIZE_2, SIZE_2, SIZE_2)), t2.numpy()), f"{t2.numpy()} wasn't ones" print(f"rank {rank} passed") diff --git a/test/unit/test_shm_tensor.py b/test/unit/test_shm_tensor.py index d8954b3e..77080660 100644 --- a/test/unit/test_shm_tensor.py +++ b/test/unit/test_shm_tensor.py @@ -1,11 +1,10 @@ import unittest import multiprocessing.shared_memory as shared_memory -from tinygrad.helpers import CI, OSX +from tinygrad.helpers import CI from tinygrad.runtime.ops_shm import RawShmBuffer from tinygrad.tensor import Tensor, Device import numpy as np -@unittest.skipIf(OSX, "no shm on OSX") class TestRawShmBuffer(unittest.TestCase): def test_e2e(self): t = Tensor.randn(2, 2, 2).realize() diff --git a/tinygrad/runtime/ops_shm.py b/tinygrad/runtime/ops_shm.py index dc38de92..79615dc0 100644 --- a/tinygrad/runtime/ops_shm.py +++ b/tinygrad/runtime/ops_shm.py @@ -2,7 +2,7 @@ import os, mmap try: import _posixshmem # type: ignore except Exception: pass from typing import Callable, Dict -from tinygrad.helpers import DType +from tinygrad.helpers import DType, OSX from tinygrad.runtime.lib import RawBufferMapped from tinygrad.ops import Interpreted, Op, UnaryOps, MovementOps @@ -13,11 +13,16 @@ class RawShmBuffer(RawBufferMapped): if self.cache_id is not None and self.cache_id in SHM_CACHE: shm = SHM_CACHE[self.cache_id] else: - fd = _posixshmem.shm_open(device, os.O_RDWR, 0o600) - # TODO: these flags are somewhat platform specific, but python doesn't expose the ones we need - shm = mmap.mmap(fd, size * dtype.itemsize, flags=mmap.MAP_SHARED | 0x2000 | 0x008000) - shm.madvise(mmap.MADV_HUGEPAGE) # type: ignore - os.close(fd) + if OSX: + with open(f"/tmp/shm_{device}", "w+b") as f: + f.truncate(size * dtype.itemsize) + shm = mmap.mmap(f.fileno(), size * dtype.itemsize, flags=mmap.MAP_SHARED) + else: + fd = _posixshmem.shm_open(device, os.O_RDWR, 0o600) + # TODO: these flags are somewhat platform specific, but python doesn't expose the ones we need + shm = mmap.mmap(fd, size * dtype.itemsize, flags=mmap.MAP_SHARED | 0x2000 | 0x008000) + shm.madvise(mmap.MADV_HUGEPAGE) # type: ignore + os.close(fd) if self.cache_id is not None: SHM_CACHE[self.cache_id] = shm super().__init__(size, dtype, shm) diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py index bd247f92..fa4d9bfe 100644 --- a/tinygrad/tensor.py +++ b/tinygrad/tensor.py @@ -91,7 +91,7 @@ class Tensor: # TODO: this is a hack for writing to DISK if self.device.startswith("DISK"): if x.__class__ is not Tensor: x = Tensor(x, device="CPU", dtype=self.dtype) - self.lazydata.realize().realized._copyin(x.numpy()) # type: ignore + self.lazydata.contiguous().realize().realized._copyin(x.numpy()) # type: ignore return self if x.__class__ is not Tensor: x = Tensor(x, device=self.device, dtype=self.dtype) assert self.shape == x.shape and self.device == x.device, f"assign shape mismatch {self.shape} != {x.shape} or device mismatch {self.device} != {x.device}"