tinygrad/test/test_jit.py

#!/usr/bin/env python
import unittest
import numpy as np
from tinygrad.tensor import Tensor, Device
from tinygrad.jit import TinyJit, JIT_SUPPORTED_DEVICE
import pytest

pytestmark = pytest.mark.webgpu

# NOTE: METAL fails, might be platform and optimization options dependent.
@unittest.skipUnless(Device.DEFAULT in JIT_SUPPORTED_DEVICE and Device.DEFAULT not in ["METAL", "WEBGPU"], f"no JIT on {Device.DEFAULT}")
class TestJit(unittest.TestCase):
  def test_simple_jit(self):
    @TinyJit
    def add(a, b): return (a+b).realize()
    for _ in range(5):
      a = Tensor.randn(10, 10)
      b = Tensor.randn(10, 10)
      c = add(a, b)
      np.testing.assert_allclose(c.numpy(), a.numpy()+b.numpy(), atol=1e-4, rtol=1e-5)
    assert len(add.jit_cache) == 1

  def test_jit_multiple_outputs(self):
    @TinyJit
    def f(a, b): return (a+b).realize(), (a-b).realize(), (a*b).realize()
    for _ in range(5):
      a = Tensor.randn(10, 10)
      b = Tensor.randn(10, 10)
      c, d, e = f(a, b)
      np.testing.assert_allclose(c.numpy(), a.numpy()+b.numpy(), atol=1e-4, rtol=1e-5)
      np.testing.assert_allclose(d.numpy(), a.numpy()-b.numpy(), atol=1e-4, rtol=1e-5)
      np.testing.assert_allclose(e.numpy(), a.numpy()*b.numpy(), atol=1e-4, rtol=1e-5)
    assert len(f.jit_cache) == 3

  def test_nothing_jitted(self):
    @TinyJit
    def add(a, b): return a+b
    with self.assertRaises(AssertionError):
      for _ in range(5):
        a = Tensor.randn(10, 10)
        b = Tensor.randn(10, 10)
        c = add(a, b)

  def test_jit_shape_mismatch(self):
    @TinyJit
    def add(a, b): return (a+b).realize()
    for _ in range(5):
      a = Tensor.randn(10, 10)
      b = Tensor.randn(10, 10)
      c = add(a, b)
    bad = Tensor.randn(20, 20)
    with self.assertRaises(AssertionError):
      add(a, bad)

  def test_jit_duplicate_fail(self):
    # the jit doesn't support duplicate arguments
    @TinyJit
    def add(a, b): return (a+b).realize()
    a = Tensor.randn(10, 10)
    with self.assertRaises(AssertionError):
      add(a, a)

  def test_kwargs_jit(self):
    @TinyJit
    def add_kwargs(first, second): return (first+second).realize()
    for _ in range(5):
      a = Tensor.randn(10, 10)
      b = Tensor.randn(10, 10)
      c = add_kwargs(first=a, second=b)
      np.testing.assert_allclose(c.numpy(), a.numpy()+b.numpy(), atol=1e-4, rtol=1e-5)
    assert len(add_kwargs.jit_cache) == 1

  def test_array_jit(self):
    @TinyJit
    def add_array(a, arr): return (a+arr[0]).realize()
    for i in range(5):
      a = Tensor.randn(10, 10)
      b = Tensor.randn(10, 10)
      a.realize(), b.realize()
      c = add_array(a, [b])
      if i >= 2:
        # should fail once jitted since jit can't handle arrays
        np.testing.assert_allclose(np.any(np.not_equal(c.numpy(),a.numpy()+b.numpy())), True, atol=1e-4, rtol=1e-5)
      else:
        np.testing.assert_allclose(c.numpy(), a.numpy()+b.numpy(), atol=1e-4, rtol=1e-5)
    assert len(add_array.jit_cache) == 1

  def test_method_jit(self):
    class Fun:
      def __init__(self):
        self.a = Tensor.randn(10, 10)
      @TinyJit
      def __call__(self, b:Tensor) -> Tensor:
        return (self.a+b).realize()
    fun = Fun()
    for _ in range(5):
      b = Tensor.randn(10, 10)
      c = fun(b)
      np.testing.assert_allclose(c.numpy(), fun.a.numpy()+b.numpy(), atol=1e-4, rtol=1e-5)
    assert len(fun.__call__.func.__self__.jit_cache) == 1

  def test_jit_size1_input(self):
    @TinyJit
    def f(a, b): return (a+b).realize()
    a = Tensor([1, 2, 3])
    for i in range(5):
      np.testing.assert_allclose(f(a, Tensor([i])).cpu().numpy(), (a+i).cpu().numpy(), atol=1e-4, rtol=1e-5)
    assert len(f.jit_cache) == 1

  def test_jit_output_non_tensor_fail(self):
    @TinyJit
    def f(a, b, i): return (a+b).realize(), i
    output1, output2 = [], []
    expect1, expect2 = [], []
    for i in range(5):
      a = Tensor.randn(10, 10)
      b = Tensor.randn(10, 10)
      o1, o2 = f(a, b, i)
      output1.append(o1.numpy().copy())
      output2.append(o2)
      expect1.append(a.numpy().copy()+b.numpy().copy())
      expect2.append(i)
    np.testing.assert_allclose(output1, expect1, atol=1e-4, rtol=1e-5)
    # the jit only works with Tensor outputs
    assert output2 != expect2
    assert len(f.jit_cache) == 1

if __name__ == '__main__':
  unittest.main()
the jit has a test 2023-02-12 02:04:03 +08:00			`#!/usr/bin/env python`
			`import unittest`
			`import numpy as np`
			`from tinygrad.tensor import Tensor, Device`
Enable JIT tests for supported devices, skip METAL and WEBGPU (#1265) * Enable JIT test * really test metal * Skip some device 2023-07-19 02:40:37 +08:00			`from tinygrad.jit import TinyJit, JIT_SUPPORTED_DEVICE`
CI < 5 minutes (#1252) * models matrix * fix typo and install gpu deps * install llvm deps if needed * fix * testops with cuda * remove pip cache since not work * cuda env * install cuda deps * maybe it will work now * i can't read * all tests in matrix * trim down more * opencl stuff in matrix * opencl pip cache * test split * change cuda test exclusion * test * fix cuda maybe * add models * add more n=auto * third thing * fix bug * cache pip more * change name * update tests * try again cause why not * balance * try again... * try apt cache for cuda * try on gpu: * try cuda again * update packages step * replace libz-dev with zlib1g-dev * only cache cuda * why error * fix gpuocelot bug * apt cache err * apt cache to slow? * opt and image in single runner * add a couple n=autos * remove test matrix * try cuda apt cache again * libz-dev -> zlib1g-dev * remove -s since not supported by xdist * the cache takes too long and doesn't work * combine webgpu and metal tests * combine imagenet to c and cpu tests * torch tests with linters * torch back by itself * small windows clang test with torch tests * fix a goofy windows bug * im dumb * bro * clang with linters * fix pylint error * linter not work on windows * try with clang again * clang and imagenet? * install deps * fix * fix quote * clang by itself (windows too slow) * env vars for imagenet * cache pip for metal and webgpu tests * try torch with metal and webgpu * doesn't work, too long * remove -v * try -n=logical * don't use logical * revert accidental thing * remove some prints unless CI * fix print unless CI * ignore speed tests for slow tests * clang windows in matrix (ubuntu being tested in imagenet->c test) * try manual pip cache * fix windows pip cache path * all manual pip cache * fix pip cache dir for macos * print_ci function in helpers * CI as variable, no print_ci * missed one * cuda tests with docker image * remove setup-python action for cuda * python->python3? * remove -s -v * try fix pip cache * maybe fix * try to fix pip cache * is this the path? * maybe cache pip * try again * create wheels dir * ? * cuda pip deps in dockerfile * disable pip cache for clang * image from ghcr instead of docker hub * why is clang like this * fast deps * try use different caches * remove the fast thing * try with lighter image * remove setup python for cuda * small docker and cuda fast deps * ignore a few more tests * cool docker thing (maybe) * oops * quotes * fix docker command * fix bug * ignore train efficientnet test * remove dockerfile (docker stuff takes too long) * remove docker stuff and normal cuda * oops * ignore the tests for cuda * does this work * ignore test_train on slow backends * add space * llvm ignore same tests as cuda * nvm * ignore lr scheduler tests * get some stats * fix ignore bug * remove extra ' * remove and * ignore test for llvm * change ignored tests and durationon all backends * fix * and -> or * ignore some more cuda tests * finally? * does this fix it * remove durations=0 * add some more tests to llvm * make last pytest more readable * fix * don't train efficientnet on cpu * try w/out pip cache * pip cache seems to be generally better * pytest file markers * try apt fast for cuda * use quick install for apt-fast * apt-fast not worth * apt-get to apt * fix typo * suppress warnings * register markers * disable debug on fuzz tests * change marker names * apt update and apt install in one command * update marker names in test.yml * webgpu pytest marker 2023-07-24 04:00:56 +08:00			`import pytest`

			`pytestmark = pytest.mark.webgpu`
the jit has a test 2023-02-12 02:04:03 +08:00
Enable JIT tests for supported devices, skip METAL and WEBGPU (#1265) * Enable JIT test * really test metal * Skip some device 2023-07-19 02:40:37 +08:00			`# NOTE: METAL fails, might be platform and optimization options dependent.`
			`@unittest.skipUnless(Device.DEFAULT in JIT_SUPPORTED_DEVICE and Device.DEFAULT not in ["METAL", "WEBGPU"], f"no JIT on {Device.DEFAULT}")`
the jit has a test 2023-02-12 02:04:03 +08:00			`class TestJit(unittest.TestCase):`
			`def test_simple_jit(self):`
			`@TinyJit`
test speed v torch uses jit 2023-02-12 23:43:17 +08:00			`def add(a, b): return (a+b).realize()`
fix shapetracker test 2023-03-13 13:33:25 +08:00			`for _ in range(5):`
the jit has a test 2023-02-12 02:04:03 +08:00			`a = Tensor.randn(10, 10)`
			`b = Tensor.randn(10, 10)`
			`c = add(a, b)`
use allclose instead of equals in test_jit (#1504) Closes #1503 2023-08-09 13:22:17 +08:00			`np.testing.assert_allclose(c.numpy(), a.numpy()+b.numpy(), atol=1e-4, rtol=1e-5)`
More JIT test cases (#1280) * More JIT test cases * test against jit_cache directly * remove unused 2023-07-20 01:45:43 +08:00			`assert len(add.jit_cache) == 1`

			`def test_jit_multiple_outputs(self):`
			`@TinyJit`
			`def f(a, b): return (a+b).realize(), (a-b).realize(), (a*b).realize()`
			`for _ in range(5):`
			`a = Tensor.randn(10, 10)`
			`b = Tensor.randn(10, 10)`
			`c, d, e = f(a, b)`
use allclose instead of equals in test_jit (#1504) Closes #1503 2023-08-09 13:22:17 +08:00			`np.testing.assert_allclose(c.numpy(), a.numpy()+b.numpy(), atol=1e-4, rtol=1e-5)`
			`np.testing.assert_allclose(d.numpy(), a.numpy()-b.numpy(), atol=1e-4, rtol=1e-5)`
			`np.testing.assert_allclose(e.numpy(), a.numpy()*b.numpy(), atol=1e-4, rtol=1e-5)`
More JIT test cases (#1280) * More JIT test cases * test against jit_cache directly * remove unused 2023-07-20 01:45:43 +08:00			`assert len(f.jit_cache) == 3`

			`def test_nothing_jitted(self):`
			`@TinyJit`
			`def add(a, b): return a+b`
			`with self.assertRaises(AssertionError):`
			`for _ in range(5):`
			`a = Tensor.randn(10, 10)`
			`b = Tensor.randn(10, 10)`
			`c = add(a, b)`
the jit has a test 2023-02-12 02:04:03 +08:00
assertions for jit 2023-05-06 12:56:32 +08:00			`def test_jit_shape_mismatch(self):`
			`@TinyJit`
			`def add(a, b): return (a+b).realize()`
More JIT test cases (#1280) * More JIT test cases * test against jit_cache directly * remove unused 2023-07-20 01:45:43 +08:00			`for _ in range(5):`
assertions for jit 2023-05-06 12:56:32 +08:00			`a = Tensor.randn(10, 10)`
			`b = Tensor.randn(10, 10)`
			`c = add(a, b)`
			`bad = Tensor.randn(20, 20)`
			`with self.assertRaises(AssertionError):`
			`add(a, bad)`

			`def test_jit_duplicate_fail(self):`
			`# the jit doesn't support duplicate arguments`
			`@TinyJit`
			`def add(a, b): return (a+b).realize()`
			`a = Tensor.randn(10, 10)`
			`with self.assertRaises(AssertionError):`
			`add(a, a)`

the jit has a test 2023-02-12 02:04:03 +08:00			`def test_kwargs_jit(self):`
			`@TinyJit`
test speed v torch uses jit 2023-02-12 23:43:17 +08:00			`def add_kwargs(first, second): return (first+second).realize()`
fix shapetracker test 2023-03-13 13:33:25 +08:00			`for _ in range(5):`
the jit has a test 2023-02-12 02:04:03 +08:00			`a = Tensor.randn(10, 10)`
			`b = Tensor.randn(10, 10)`
			`c = add_kwargs(first=a, second=b)`
use allclose instead of equals in test_jit (#1504) Closes #1503 2023-08-09 13:22:17 +08:00			`np.testing.assert_allclose(c.numpy(), a.numpy()+b.numpy(), atol=1e-4, rtol=1e-5)`
More JIT test cases (#1280) * More JIT test cases * test against jit_cache directly * remove unused 2023-07-20 01:45:43 +08:00			`assert len(add_kwargs.jit_cache) == 1`
the jit has a test 2023-02-12 02:04:03 +08:00
			`def test_array_jit(self):`
			`@TinyJit`
test speed v torch uses jit 2023-02-12 23:43:17 +08:00			`def add_array(a, arr): return (a+arr[0]).realize()`
fix shapetracker test 2023-03-13 13:33:25 +08:00			`for i in range(5):`
the jit has a test 2023-02-12 02:04:03 +08:00			`a = Tensor.randn(10, 10)`
			`b = Tensor.randn(10, 10)`
			`a.realize(), b.realize()`
test speed v torch uses jit 2023-02-12 23:43:17 +08:00			`c = add_array(a, [b])`
fix shapetracker test 2023-03-13 13:33:25 +08:00			`if i >= 2:`
the jit has a test 2023-02-12 02:04:03 +08:00			`# should fail once jitted since jit can't handle arrays`
use allclose instead of equals in test_jit (#1504) Closes #1503 2023-08-09 13:22:17 +08:00			`np.testing.assert_allclose(np.any(np.not_equal(c.numpy(),a.numpy()+b.numpy())), True, atol=1e-4, rtol=1e-5)`
the jit has a test 2023-02-12 02:04:03 +08:00			`else:`
use allclose instead of equals in test_jit (#1504) Closes #1503 2023-08-09 13:22:17 +08:00			`np.testing.assert_allclose(c.numpy(), a.numpy()+b.numpy(), atol=1e-4, rtol=1e-5)`
More JIT test cases (#1280) * More JIT test cases * test against jit_cache directly * remove unused 2023-07-20 01:45:43 +08:00			`assert len(add_array.jit_cache) == 1`
the jit has a test 2023-02-12 02:04:03 +08:00
jittable llama 2023-03-13 05:15:04 +08:00			`def test_method_jit(self):`
			`class Fun:`
			`def __init__(self):`
			`self.a = Tensor.randn(10, 10)`
			`@TinyJit`
			`def __call__(self, b:Tensor) -> Tensor:`
			`return (self.a+b).realize()`
			`fun = Fun()`
			`for _ in range(5):`
			`b = Tensor.randn(10, 10)`
			`c = fun(b)`
use allclose instead of equals in test_jit (#1504) Closes #1503 2023-08-09 13:22:17 +08:00			`np.testing.assert_allclose(c.numpy(), fun.a.numpy()+b.numpy(), atol=1e-4, rtol=1e-5)`
More JIT test cases (#1280) * More JIT test cases * test against jit_cache directly * remove unused 2023-07-20 01:45:43 +08:00			`assert len(fun.__call__.func.__self__.jit_cache) == 1`

			`def test_jit_size1_input(self):`
			`@TinyJit`
			`def f(a, b): return (a+b).realize()`
			`a = Tensor([1, 2, 3])`
			`for i in range(5):`
use allclose instead of equals in test_jit (#1504) Closes #1503 2023-08-09 13:22:17 +08:00			`np.testing.assert_allclose(f(a, Tensor([i])).cpu().numpy(), (a+i).cpu().numpy(), atol=1e-4, rtol=1e-5)`
More JIT test cases (#1280) * More JIT test cases * test against jit_cache directly * remove unused 2023-07-20 01:45:43 +08:00			`assert len(f.jit_cache) == 1`

			`def test_jit_output_non_tensor_fail(self):`
			`@TinyJit`
			`def f(a, b, i): return (a+b).realize(), i`
			`output1, output2 = [], []`
			`expect1, expect2 = [], []`
			`for i in range(5):`
			`a = Tensor.randn(10, 10)`
			`b = Tensor.randn(10, 10)`
			`o1, o2 = f(a, b, i)`
			`output1.append(o1.numpy().copy())`
			`output2.append(o2)`
			`expect1.append(a.numpy().copy()+b.numpy().copy())`
			`expect2.append(i)`
use allclose instead of equals in test_jit (#1504) Closes #1503 2023-08-09 13:22:17 +08:00			`np.testing.assert_allclose(output1, expect1, atol=1e-4, rtol=1e-5)`
More JIT test cases (#1280) * More JIT test cases * test against jit_cache directly * remove unused 2023-07-20 01:45:43 +08:00			`# the jit only works with Tensor outputs`
			`assert output2 != expect2`
			`assert len(f.jit_cache) == 1`
jittable llama 2023-03-13 05:15:04 +08:00
the jit has a test 2023-02-12 02:04:03 +08:00			`if __name__ == '__main__':`
			`unittest.main()`