mirror of https://github.com/commaai/tinygrad.git
parent
5eb81ff764
commit
9b02aef45a
|
@ -332,7 +332,7 @@ jobs:
|
|||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
backend: [llvm, clang, gpu, cuda, hip, ptx, amd] #, triton]
|
||||
backend: [llvm, clang, gpu, cuda, ptx, amd] #, triton]
|
||||
|
||||
name: Tests on (${{ matrix.backend }})
|
||||
runs-on: ubuntu-latest
|
||||
|
@ -356,7 +356,7 @@ jobs:
|
|||
path: ~/.cache/tinygrad/downloads/
|
||||
key: downloads-cache-${{ matrix.backend }}-${{ env.DOWNLOAD_CACHE_VERSION }}
|
||||
- name: Set env
|
||||
run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'clang' && 'CLANG=1' || matrix.backend == 'gpu' && 'GPU=1' || matrix.backend == 'cuda' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\n' || matrix.backend == 'PTX' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\nPTX=1' || matrix.backend == 'triton' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\nTRITON=1\nTRITON_PTXAS_PATH=/usr/bin/ptxas' || matrix.backend == 'hip' && 'RHIP=1\nFORWARD_ONLY=1' || matrix.backend == 'amd' && 'AMD=1\nMOCKGPU=1\nFORWARD_ONLY=1' }}" >> $GITHUB_ENV
|
||||
run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'clang' && 'CLANG=1' || matrix.backend == 'gpu' && 'GPU=1' || matrix.backend == 'cuda' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\n' || matrix.backend == 'PTX' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\nPTX=1' || matrix.backend == 'triton' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\nTRITON=1\nTRITON_PTXAS_PATH=/usr/bin/ptxas' || matrix.backend == 'amd' && 'AMD=1\nMOCKGPU=1\nFORWARD_ONLY=1' }}" >> $GITHUB_ENV
|
||||
- name: Install OpenCL
|
||||
if: matrix.backend == 'gpu'
|
||||
run: |
|
||||
|
@ -398,8 +398,8 @@ jobs:
|
|||
run: |
|
||||
cd ${{ github.workspace }}/gpuocelot/ocelot/build
|
||||
sudo ninja install -d explain
|
||||
- name: Install packages (hip)
|
||||
if: matrix.backend == 'hip' || matrix.backend == 'amd'
|
||||
- name: Install packages (amd)
|
||||
if: matrix.backend == 'amd'
|
||||
run: |
|
||||
echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel
|
||||
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
|
||||
|
@ -416,7 +416,7 @@ jobs:
|
|||
run: pip install -e '.[testing${{matrix.backend=='llvm'&&',llvm'||matrix.backend=='cuda'&&',cuda'||matrix.backend=='ptx'&&',cuda'||matrix.backend=='triton'&&',triton'||''}}]' --extra-index-url https://download.pytorch.org/whl/cpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/
|
||||
- name: Check Device.DEFAULT and print some source
|
||||
run: |
|
||||
PYTHONPATH=${{ github.workspace }} python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['LLVM','CLANG','CUDA','GPU','RHIP','AMD'], Device.DEFAULT"
|
||||
PYTHONPATH=${{ github.workspace }} python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['LLVM','CLANG','CUDA','GPU','AMD'], Device.DEFAULT"
|
||||
DEBUG=5 PYTHONPATH=${{ github.workspace }} FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add
|
||||
- name: Verify OpenCL autogen
|
||||
if: matrix.backend == 'gpu'
|
||||
|
@ -433,8 +433,8 @@ jobs:
|
|||
./autogen_stubs.sh nv
|
||||
diff /tmp/cuda.py.bak tinygrad/runtime/autogen/cuda.py
|
||||
diff /tmp/nv_gpu.py.bak tinygrad/runtime/autogen/nv_gpu.py
|
||||
- name: Verify HIP autogen
|
||||
if: matrix.backend == 'hip'
|
||||
- name: Verify AMD autogen
|
||||
if: matrix.backend == 'amd'
|
||||
run: |
|
||||
cp tinygrad/runtime/autogen/hsa.py /tmp/hsa.py.bak
|
||||
cp tinygrad/runtime/autogen/comgr.py /tmp/comgr.py.bak
|
||||
|
@ -442,8 +442,8 @@ jobs:
|
|||
./autogen_stubs.sh comgr
|
||||
diff /tmp/hsa.py.bak tinygrad/runtime/autogen/hsa.py
|
||||
diff /tmp/comgr.py.bak tinygrad/runtime/autogen/comgr.py
|
||||
- name: Run pytest (not cuda or hip/amd)
|
||||
if: matrix.backend!='cuda' && matrix.backend!='ptx' && matrix.backend!='triton' && matrix.backend != 'hip' && matrix.backend != 'amd'
|
||||
- name: Run pytest (not cuda or amd)
|
||||
if: matrix.backend!='cuda' && matrix.backend!='ptx' && matrix.backend!='triton' && matrix.backend != 'amd'
|
||||
run: python -m pytest -n=auto test/ --durations=20
|
||||
- name: Run ONNX (only LLVM)
|
||||
if: matrix.backend == 'llvm'
|
||||
|
@ -451,9 +451,6 @@ jobs:
|
|||
- name: Run pytest (cuda)
|
||||
if: matrix.backend=='cuda'||matrix.backend=='ptx'||matrix.backend=='triton'
|
||||
run: python -m pytest -n=auto test/ -k 'not (half or test_efficientnet_safetensors)' --ignore=test/external --ignore=test/models --durations=20
|
||||
- name: Run pytest (hip)
|
||||
if: matrix.backend=='hip'
|
||||
run: python -m pytest -n=auto test/test_ops.py test/test_dtype.py test/test_dtype_alu.py test/test_linearizer.py test/test_randomness.py test/imported/test_indexing.py test/external/external_test_hip_compile.py --durations=20
|
||||
- name: Run pytest (amd)
|
||||
if: matrix.backend=='amd'
|
||||
run: python -m pytest -n=auto test/test_ops.py test/test_dtype.py test/test_dtype_alu.py test/test_linearizer.py test/test_randomness.py test/imported/test_indexing.py test/external/external_test_hcq.py --durations=20
|
||||
|
|
|
@ -26,7 +26,7 @@ def assert_jit_cache_len(fxn, expected_len):
|
|||
def is_dtype_supported(dtype: DType, device: str = Device.DEFAULT):
|
||||
if dtype == dtypes.bfloat16:
|
||||
# NOTE: this requires bf16 buffer support
|
||||
return device in {"RHIP", "HSA", "AMD"} or (device == "CUDA" and not CI and not getenv("PTX"))
|
||||
return device in {"HSA", "AMD"} or (device == "CUDA" and not CI and not getenv("PTX"))
|
||||
if device in ["WEBGPU", "WEBGL"]: return dtype in [dtypes.float, dtypes.int32, dtypes.uint32]
|
||||
if device == "CUDA" and getenv("PTX") and dtype in (dtypes.int8, dtypes.uint8): return False
|
||||
# for CI GPU and OSX, cl_khr_fp16 isn't supported
|
||||
|
|
|
@ -145,7 +145,7 @@ class TestDTypeALU(unittest.TestCase):
|
|||
def test_int32_midcast_float(self, a, b, c, op1, op2): universal_test_midcast(a, b, c, op1, op2, dtypes.int32, dtypes.float32)
|
||||
|
||||
# Metal and CUDACPU and HIP behave differently than numpy in CI for overflows
|
||||
skip_overflow = CI and (Device.DEFAULT in {"RHIP", "HSA", "AMD"} or getenv("CUDACPU"))
|
||||
skip_overflow = CI and (Device.DEFAULT in {"HSA", "AMD"} or getenv("CUDACPU"))
|
||||
@given(strat.floats(width=32, min_value=0, max_value=10.0) if skip_overflow else ht.float32,
|
||||
strat.floats(width=32, min_value=0, max_value=10.0) if skip_overflow else ht.float32,
|
||||
ht.int32, strat.sampled_from(binary_operations), strat.sampled_from(integer_binary_operations))
|
||||
|
|
|
@ -292,7 +292,7 @@ class TestLinearizer(unittest.TestCase):
|
|||
# check correctness
|
||||
helper_tc_allclose(tc.dims[0]+pad, tc.dims[1]+pad, tc.dims[2]+pad, tc.dtype_in, tc.dtype_out, tc_opt=2)
|
||||
|
||||
@unittest.skipIf(CI and Device.DEFAULT in {"RHIP", "AMD"}, "RHIP/AMD CI is really slow here")
|
||||
@unittest.skipIf(CI and Device.DEFAULT in {"AMD"}, "AMD CI is really slow here")
|
||||
def test_tensor_cores_multi_reduce(self):
|
||||
if not Device[Device.DEFAULT].renderer.has_tensor_cores:
|
||||
self.skipTest("device doesn't have tensor cores")
|
||||
|
@ -852,7 +852,7 @@ class TestKernelOpts(unittest.TestCase):
|
|||
], apply_tc=True, atol=atol, rtol=rtol)
|
||||
|
||||
def test_padto_matmul(self):
|
||||
if CI and Device.DEFAULT in ["CUDA", "RHIP", "AMD"]: self.skipTest("super slow on CUDA and RHIP because of the big grid dims")
|
||||
if CI and Device.DEFAULT in ["CUDA", "AMD"]: self.skipTest("super slow on CUDA and AMD because of the big grid dims")
|
||||
N = 17 * 17
|
||||
Tensor.manual_seed(289)
|
||||
a = Tensor.rand(N, N)
|
||||
|
|
|
@ -104,7 +104,7 @@ class TestRandomness(unittest.TestCase):
|
|||
self.assertTrue(equal_distribution(Tensor.randn, torch.randn, lambda x: np.random.randn(*x)))
|
||||
|
||||
@given(strat.sampled_from([dtypes.float, dtypes.float16, dtypes.bfloat16]))
|
||||
@unittest.skipIf(Device.DEFAULT in ["HSA", "RHIP", "AMD"], "bfloat16 local buffer broken in HSA")
|
||||
@unittest.skipIf(Device.DEFAULT in ["HSA", "AMD"], "bfloat16 local buffer broken in HSA")
|
||||
def test_randn_finite(self, default_float):
|
||||
if not is_dtype_supported(default_float): return
|
||||
old_default_float = dtypes.default_float
|
||||
|
|
|
@ -284,7 +284,6 @@ class TestDiskTensor(unittest.TestCase):
|
|||
ret = t.to("CLANG").bitcast(dtypes.uint16) + 1
|
||||
assert ret.tolist() == [2827, 3341, 3855, 4369, 4883]
|
||||
|
||||
@unittest.skipIf(Device.DEFAULT == "RHIP", "no real HIP device exists in CI")
|
||||
def test_bf16_disk_write_read(self):
|
||||
t = Tensor([10000, -1, -1000, -10000, 20], dtype=dtypes.float32)
|
||||
t.to(f"disk:{temp('f32')}").realize()
|
||||
|
|
Loading…
Reference in New Issue