remove rhip (#4579)

* remove rhip

* remove hip runner
This commit is contained in:
nimlgen 2024-05-14 17:58:19 +03:00 committed by GitHub
parent 5eb81ff764
commit 9b02aef45a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 14 additions and 18 deletions

View File

@ -332,7 +332,7 @@ jobs:
strategy:
fail-fast: false
matrix:
backend: [llvm, clang, gpu, cuda, hip, ptx, amd] #, triton]
backend: [llvm, clang, gpu, cuda, ptx, amd] #, triton]
name: Tests on (${{ matrix.backend }})
runs-on: ubuntu-latest
@ -356,7 +356,7 @@ jobs:
path: ~/.cache/tinygrad/downloads/
key: downloads-cache-${{ matrix.backend }}-${{ env.DOWNLOAD_CACHE_VERSION }}
- name: Set env
run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'clang' && 'CLANG=1' || matrix.backend == 'gpu' && 'GPU=1' || matrix.backend == 'cuda' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\n' || matrix.backend == 'PTX' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\nPTX=1' || matrix.backend == 'triton' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\nTRITON=1\nTRITON_PTXAS_PATH=/usr/bin/ptxas' || matrix.backend == 'hip' && 'RHIP=1\nFORWARD_ONLY=1' || matrix.backend == 'amd' && 'AMD=1\nMOCKGPU=1\nFORWARD_ONLY=1' }}" >> $GITHUB_ENV
run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'clang' && 'CLANG=1' || matrix.backend == 'gpu' && 'GPU=1' || matrix.backend == 'cuda' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\n' || matrix.backend == 'PTX' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\nPTX=1' || matrix.backend == 'triton' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\nTRITON=1\nTRITON_PTXAS_PATH=/usr/bin/ptxas' || matrix.backend == 'amd' && 'AMD=1\nMOCKGPU=1\nFORWARD_ONLY=1' }}" >> $GITHUB_ENV
- name: Install OpenCL
if: matrix.backend == 'gpu'
run: |
@ -398,8 +398,8 @@ jobs:
run: |
cd ${{ github.workspace }}/gpuocelot/ocelot/build
sudo ninja install -d explain
- name: Install packages (hip)
if: matrix.backend == 'hip' || matrix.backend == 'amd'
- name: Install packages (amd)
if: matrix.backend == 'amd'
run: |
echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
@ -416,7 +416,7 @@ jobs:
run: pip install -e '.[testing${{matrix.backend=='llvm'&&',llvm'||matrix.backend=='cuda'&&',cuda'||matrix.backend=='ptx'&&',cuda'||matrix.backend=='triton'&&',triton'||''}}]' --extra-index-url https://download.pytorch.org/whl/cpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/
- name: Check Device.DEFAULT and print some source
run: |
PYTHONPATH=${{ github.workspace }} python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['LLVM','CLANG','CUDA','GPU','RHIP','AMD'], Device.DEFAULT"
PYTHONPATH=${{ github.workspace }} python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['LLVM','CLANG','CUDA','GPU','AMD'], Device.DEFAULT"
DEBUG=5 PYTHONPATH=${{ github.workspace }} FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add
- name: Verify OpenCL autogen
if: matrix.backend == 'gpu'
@ -433,8 +433,8 @@ jobs:
./autogen_stubs.sh nv
diff /tmp/cuda.py.bak tinygrad/runtime/autogen/cuda.py
diff /tmp/nv_gpu.py.bak tinygrad/runtime/autogen/nv_gpu.py
- name: Verify HIP autogen
if: matrix.backend == 'hip'
- name: Verify AMD autogen
if: matrix.backend == 'amd'
run: |
cp tinygrad/runtime/autogen/hsa.py /tmp/hsa.py.bak
cp tinygrad/runtime/autogen/comgr.py /tmp/comgr.py.bak
@ -442,8 +442,8 @@ jobs:
./autogen_stubs.sh comgr
diff /tmp/hsa.py.bak tinygrad/runtime/autogen/hsa.py
diff /tmp/comgr.py.bak tinygrad/runtime/autogen/comgr.py
- name: Run pytest (not cuda or hip/amd)
if: matrix.backend!='cuda' && matrix.backend!='ptx' && matrix.backend!='triton' && matrix.backend != 'hip' && matrix.backend != 'amd'
- name: Run pytest (not cuda or amd)
if: matrix.backend!='cuda' && matrix.backend!='ptx' && matrix.backend!='triton' && matrix.backend != 'amd'
run: python -m pytest -n=auto test/ --durations=20
- name: Run ONNX (only LLVM)
if: matrix.backend == 'llvm'
@ -451,9 +451,6 @@ jobs:
- name: Run pytest (cuda)
if: matrix.backend=='cuda'||matrix.backend=='ptx'||matrix.backend=='triton'
run: python -m pytest -n=auto test/ -k 'not (half or test_efficientnet_safetensors)' --ignore=test/external --ignore=test/models --durations=20
- name: Run pytest (hip)
if: matrix.backend=='hip'
run: python -m pytest -n=auto test/test_ops.py test/test_dtype.py test/test_dtype_alu.py test/test_linearizer.py test/test_randomness.py test/imported/test_indexing.py test/external/external_test_hip_compile.py --durations=20
- name: Run pytest (amd)
if: matrix.backend=='amd'
run: python -m pytest -n=auto test/test_ops.py test/test_dtype.py test/test_dtype_alu.py test/test_linearizer.py test/test_randomness.py test/imported/test_indexing.py test/external/external_test_hcq.py --durations=20

View File

@ -26,7 +26,7 @@ def assert_jit_cache_len(fxn, expected_len):
def is_dtype_supported(dtype: DType, device: str = Device.DEFAULT):
if dtype == dtypes.bfloat16:
# NOTE: this requires bf16 buffer support
return device in {"RHIP", "HSA", "AMD"} or (device == "CUDA" and not CI and not getenv("PTX"))
return device in {"HSA", "AMD"} or (device == "CUDA" and not CI and not getenv("PTX"))
if device in ["WEBGPU", "WEBGL"]: return dtype in [dtypes.float, dtypes.int32, dtypes.uint32]
if device == "CUDA" and getenv("PTX") and dtype in (dtypes.int8, dtypes.uint8): return False
# for CI GPU and OSX, cl_khr_fp16 isn't supported

View File

@ -145,7 +145,7 @@ class TestDTypeALU(unittest.TestCase):
def test_int32_midcast_float(self, a, b, c, op1, op2): universal_test_midcast(a, b, c, op1, op2, dtypes.int32, dtypes.float32)
# Metal and CUDACPU and HIP behave differently than numpy in CI for overflows
skip_overflow = CI and (Device.DEFAULT in {"RHIP", "HSA", "AMD"} or getenv("CUDACPU"))
skip_overflow = CI and (Device.DEFAULT in {"HSA", "AMD"} or getenv("CUDACPU"))
@given(strat.floats(width=32, min_value=0, max_value=10.0) if skip_overflow else ht.float32,
strat.floats(width=32, min_value=0, max_value=10.0) if skip_overflow else ht.float32,
ht.int32, strat.sampled_from(binary_operations), strat.sampled_from(integer_binary_operations))

View File

@ -292,7 +292,7 @@ class TestLinearizer(unittest.TestCase):
# check correctness
helper_tc_allclose(tc.dims[0]+pad, tc.dims[1]+pad, tc.dims[2]+pad, tc.dtype_in, tc.dtype_out, tc_opt=2)
@unittest.skipIf(CI and Device.DEFAULT in {"RHIP", "AMD"}, "RHIP/AMD CI is really slow here")
@unittest.skipIf(CI and Device.DEFAULT in {"AMD"}, "AMD CI is really slow here")
def test_tensor_cores_multi_reduce(self):
if not Device[Device.DEFAULT].renderer.has_tensor_cores:
self.skipTest("device doesn't have tensor cores")
@ -852,7 +852,7 @@ class TestKernelOpts(unittest.TestCase):
], apply_tc=True, atol=atol, rtol=rtol)
def test_padto_matmul(self):
if CI and Device.DEFAULT in ["CUDA", "RHIP", "AMD"]: self.skipTest("super slow on CUDA and RHIP because of the big grid dims")
if CI and Device.DEFAULT in ["CUDA", "AMD"]: self.skipTest("super slow on CUDA and AMD because of the big grid dims")
N = 17 * 17
Tensor.manual_seed(289)
a = Tensor.rand(N, N)

View File

@ -104,7 +104,7 @@ class TestRandomness(unittest.TestCase):
self.assertTrue(equal_distribution(Tensor.randn, torch.randn, lambda x: np.random.randn(*x)))
@given(strat.sampled_from([dtypes.float, dtypes.float16, dtypes.bfloat16]))
@unittest.skipIf(Device.DEFAULT in ["HSA", "RHIP", "AMD"], "bfloat16 local buffer broken in HSA")
@unittest.skipIf(Device.DEFAULT in ["HSA", "AMD"], "bfloat16 local buffer broken in HSA")
def test_randn_finite(self, default_float):
if not is_dtype_supported(default_float): return
old_default_float = dtypes.default_float

View File

@ -284,7 +284,6 @@ class TestDiskTensor(unittest.TestCase):
ret = t.to("CLANG").bitcast(dtypes.uint16) + 1
assert ret.tolist() == [2827, 3341, 3855, 4369, 4883]
@unittest.skipIf(Device.DEFAULT == "RHIP", "no real HIP device exists in CI")
def test_bf16_disk_write_read(self):
t = Tensor([10000, -1, -1000, -10000, 20], dtype=dtypes.float32)
t.to(f"disk:{temp('f32')}").realize()