remove rhip (#4579)

* remove rhip * remove hip runner
2024-05-14 17:58:19 +03:00 · 2024-05-14 17:58:19 +03:00 · 9b02aef45a
parent 5eb81ff764
commit 9b02aef45a
7 changed files with 14 additions and 18 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -332,7 +332,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        backend: [llvm, clang, gpu, cuda, hip, ptx, amd] #, triton]
+        backend: [llvm, clang, gpu, cuda, ptx, amd] #, triton]

    name: Tests on (${{ matrix.backend }})
    runs-on: ubuntu-latest
@ -356,7 +356,7 @@ jobs:
          path: ~/.cache/tinygrad/downloads/
          key: downloads-cache-${{ matrix.backend }}-${{ env.DOWNLOAD_CACHE_VERSION }}
      - name: Set env
-        run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'clang' && 'CLANG=1' || matrix.backend == 'gpu' && 'GPU=1' || matrix.backend == 'cuda' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\n' || matrix.backend == 'PTX' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\nPTX=1' || matrix.backend == 'triton' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\nTRITON=1\nTRITON_PTXAS_PATH=/usr/bin/ptxas' || matrix.backend == 'hip' && 'RHIP=1\nFORWARD_ONLY=1' || matrix.backend == 'amd' && 'AMD=1\nMOCKGPU=1\nFORWARD_ONLY=1' }}" >> $GITHUB_ENV
+        run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'clang' && 'CLANG=1' || matrix.backend == 'gpu' && 'GPU=1' || matrix.backend == 'cuda' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\n' || matrix.backend == 'PTX' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\nPTX=1' || matrix.backend == 'triton' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\nTRITON=1\nTRITON_PTXAS_PATH=/usr/bin/ptxas' || matrix.backend == 'amd' && 'AMD=1\nMOCKGPU=1\nFORWARD_ONLY=1' }}" >> $GITHUB_ENV
      - name: Install OpenCL
        if: matrix.backend == 'gpu'
        run: |
@ -398,8 +398,8 @@ jobs:
        run: |
          cd ${{ github.workspace }}/gpuocelot/ocelot/build
          sudo ninja install -d explain
-      - name: Install packages (hip)
-        if: matrix.backend == 'hip' || matrix.backend == 'amd'
+      - name: Install packages (amd)
+        if: matrix.backend == 'amd'
        run: |
          echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel
          wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
@ -416,7 +416,7 @@ jobs:
        run: pip install -e '.[testing${{matrix.backend=='llvm'&&',llvm'||matrix.backend=='cuda'&&',cuda'||matrix.backend=='ptx'&&',cuda'||matrix.backend=='triton'&&',triton'||''}}]' --extra-index-url https://download.pytorch.org/whl/cpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/
      - name: Check Device.DEFAULT and print some source
        run: |
-          PYTHONPATH=${{ github.workspace }} python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['LLVM','CLANG','CUDA','GPU','RHIP','AMD'], Device.DEFAULT"
+          PYTHONPATH=${{ github.workspace }} python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['LLVM','CLANG','CUDA','GPU','AMD'], Device.DEFAULT"
          DEBUG=5 PYTHONPATH=${{ github.workspace }} FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add
      - name: Verify OpenCL autogen
        if: matrix.backend == 'gpu'
@ -433,8 +433,8 @@ jobs:
          ./autogen_stubs.sh nv
          diff /tmp/cuda.py.bak tinygrad/runtime/autogen/cuda.py
          diff /tmp/nv_gpu.py.bak tinygrad/runtime/autogen/nv_gpu.py
-      - name: Verify HIP autogen
-        if: matrix.backend == 'hip'
+      - name: Verify AMD autogen
+        if: matrix.backend == 'amd'
        run: |
          cp tinygrad/runtime/autogen/hsa.py /tmp/hsa.py.bak
          cp tinygrad/runtime/autogen/comgr.py /tmp/comgr.py.bak
@ -442,8 +442,8 @@ jobs:
          ./autogen_stubs.sh comgr
          diff /tmp/hsa.py.bak tinygrad/runtime/autogen/hsa.py
          diff /tmp/comgr.py.bak tinygrad/runtime/autogen/comgr.py
-      - name: Run pytest (not cuda or hip/amd)
-        if: matrix.backend!='cuda' && matrix.backend!='ptx' && matrix.backend!='triton' && matrix.backend != 'hip' && matrix.backend != 'amd'
+      - name: Run pytest (not cuda or amd)
+        if: matrix.backend!='cuda' && matrix.backend!='ptx' && matrix.backend!='triton' && matrix.backend != 'amd'
        run: python -m pytest -n=auto test/ --durations=20
      - name: Run ONNX (only LLVM)
        if: matrix.backend == 'llvm'
@ -451,9 +451,6 @@ jobs:
      - name: Run pytest (cuda)
        if: matrix.backend=='cuda'||matrix.backend=='ptx'||matrix.backend=='triton'
        run: python -m pytest -n=auto test/ -k 'not (half or test_efficientnet_safetensors)' --ignore=test/external --ignore=test/models --durations=20
-      - name: Run pytest (hip)
-        if: matrix.backend=='hip'
-        run: python -m pytest -n=auto test/test_ops.py test/test_dtype.py test/test_dtype_alu.py test/test_linearizer.py test/test_randomness.py test/imported/test_indexing.py test/external/external_test_hip_compile.py --durations=20
      - name: Run pytest (amd)
        if: matrix.backend=='amd'
        run: python -m pytest -n=auto test/test_ops.py test/test_dtype.py test/test_dtype_alu.py test/test_linearizer.py test/test_randomness.py test/imported/test_indexing.py test/external/external_test_hcq.py --durations=20
--- a/tinygrad/runtime/ops_rhip.py
+++ b/tinygrad/runtime/ops_rhip.py
--- a/test/helpers.py
+++ b/test/helpers.py
@ -26,7 +26,7 @@ def assert_jit_cache_len(fxn, expected_len):
 def is_dtype_supported(dtype: DType, device: str = Device.DEFAULT):
  if dtype == dtypes.bfloat16:
    # NOTE: this requires bf16 buffer support
-    return device in {"RHIP", "HSA", "AMD"} or (device == "CUDA" and not CI and not getenv("PTX"))
+    return device in {"HSA", "AMD"} or (device == "CUDA" and not CI and not getenv("PTX"))
  if device in ["WEBGPU", "WEBGL"]: return dtype in [dtypes.float, dtypes.int32, dtypes.uint32]
  if device == "CUDA" and getenv("PTX") and dtype in (dtypes.int8, dtypes.uint8): return False
  # for CI GPU and OSX, cl_khr_fp16 isn't supported
--- a/test/test_dtype_alu.py
+++ b/test/test_dtype_alu.py
@ -145,7 +145,7 @@ class TestDTypeALU(unittest.TestCase):
  def test_int32_midcast_float(self, a, b, c, op1, op2): universal_test_midcast(a, b, c, op1, op2, dtypes.int32, dtypes.float32)

  # Metal and CUDACPU and HIP behave differently than numpy in CI for overflows
-  skip_overflow = CI and (Device.DEFAULT in {"RHIP", "HSA", "AMD"} or getenv("CUDACPU"))
+  skip_overflow = CI and (Device.DEFAULT in {"HSA", "AMD"} or getenv("CUDACPU"))
  @given(strat.floats(width=32, min_value=0, max_value=10.0) if skip_overflow else ht.float32,
         strat.floats(width=32, min_value=0, max_value=10.0) if skip_overflow else ht.float32,
         ht.int32, strat.sampled_from(binary_operations), strat.sampled_from(integer_binary_operations))
--- a/test/test_linearizer.py
+++ b/test/test_linearizer.py
@ -292,7 +292,7 @@ class TestLinearizer(unittest.TestCase):
      # check correctness
      helper_tc_allclose(tc.dims[0]+pad, tc.dims[1]+pad, tc.dims[2]+pad, tc.dtype_in, tc.dtype_out, tc_opt=2)

-  @unittest.skipIf(CI and Device.DEFAULT in {"RHIP", "AMD"}, "RHIP/AMD CI is really slow here")
+  @unittest.skipIf(CI and Device.DEFAULT in {"AMD"}, "AMD CI is really slow here")
  def test_tensor_cores_multi_reduce(self):
    if not Device[Device.DEFAULT].renderer.has_tensor_cores:
      self.skipTest("device doesn't have tensor cores")
@ -852,7 +852,7 @@ class TestKernelOpts(unittest.TestCase):
      ], apply_tc=True, atol=atol, rtol=rtol)

  def test_padto_matmul(self):
-    if CI and Device.DEFAULT in ["CUDA", "RHIP", "AMD"]: self.skipTest("super slow on CUDA and RHIP because of the big grid dims")
+    if CI and Device.DEFAULT in ["CUDA", "AMD"]: self.skipTest("super slow on CUDA and AMD because of the big grid dims")
    N = 17 * 17
    Tensor.manual_seed(289)
    a = Tensor.rand(N, N)
--- a/test/test_randomness.py
+++ b/test/test_randomness.py
@ -104,7 +104,7 @@ class TestRandomness(unittest.TestCase):
    self.assertTrue(equal_distribution(Tensor.randn, torch.randn, lambda x: np.random.randn(*x)))

  @given(strat.sampled_from([dtypes.float, dtypes.float16, dtypes.bfloat16]))
-  @unittest.skipIf(Device.DEFAULT in ["HSA", "RHIP", "AMD"], "bfloat16 local buffer broken in HSA")
+  @unittest.skipIf(Device.DEFAULT in ["HSA", "AMD"], "bfloat16 local buffer broken in HSA")
  def test_randn_finite(self, default_float):
    if not is_dtype_supported(default_float): return
    old_default_float = dtypes.default_float
--- a/test/unit/test_disk_tensor.py
+++ b/test/unit/test_disk_tensor.py
@ -284,7 +284,6 @@ class TestDiskTensor(unittest.TestCase):
    ret = t.to("CLANG").bitcast(dtypes.uint16) + 1
    assert ret.tolist() == [2827, 3341, 3855, 4369, 4883]

-  @unittest.skipIf(Device.DEFAULT == "RHIP", "no real HIP device exists in CI")
  def test_bf16_disk_write_read(self):
    t = Tensor([10000, -1, -1000, -10000, 20], dtype=dtypes.float32)
    t.to(f"disk:{temp('f32')}").realize()