tinygrad/test/test_uops.py

98 lines
5.2 KiB
Python
Raw Normal View History

from typing import Optional, Tuple, Any, List
import unittest, math
import numpy as np
from tinygrad.helpers import dtypes, getenv, DType
from tinygrad.tensor import Device
from tinygrad.ops import UnaryOps, BinaryOps, TernaryOps, ASTRunner, Compiled
from tinygrad.codegen.linearizer import UOps, MemOp, UOp
from tinygrad.shape.symbolic import Variable
def _uops_to_prg(uops):
Arm (#1421) * testing new memops * better debugging * testing padded conv * branching with load * refactoring a bit * first try * fixing bugs * fixing some * eq * eq2 * do not use x's * working * fixing imm * getting things working * refactor * pow not working * working except one * refactor: one store mem * refactor: global load * refactor: imm * refactor: cleaning * fixing big offsets * refactor with ci * try ci * typo * another typo * ubuntu default * forgot git * do i need git? * missing packages * adding python-dev * with cache? * buildx action * buildx name issue? * maybe now? * python3 * newline warning * maybe now * i actually need this * ci should work now * improved caching * fixing cache * maybe now it will cache * this * testing cache * trying again * load * missing platform * caching gha * testing cache * full testing * typo * now? * why * adding checkout back * bad formatting * fixing convention issues * supporting python * adding CI flag * testing all * better comments * adding debugging * takes 12x longer * does it output progress now? * ignore models for speed * fixing merge * excluding conv_transpose2d * only 2 test cuz is to slow * another approach * let's see * faster duh * my bad * T_T * typo * sup * with output? * comment test * comment test * comment test * :? * no comment * with cache * back to normal * testing that ci works * back to passing * trying again * does it create another entry * does it create another entry? * build local * hey * Revert "excluding conv_transpose2d" This reverts commit cc7348de03033e032f47d69caff174e2f1a7bfea. * does it cache if done before? * does it cache? * done * adding test ops * bad formatting * no need for this * working static mem * sum 1d * add ndim * better reg import * fix stack * back to np * working except for softmax * 5 failing * no pogress * remove keystone * remove keystone * testops passing * cleanups * more cleanup * typo * ci * ci2 * cond import * ci3 * ci4 * ci4 * ci5 * ci5 * ci6 * aligment * test all * correct test * err read_unmapped * passing test * ignore for speed * ignore for speed * ci7 * cleanup * remove docker * fixing merge * fixing bugs * add skipload for const ops * comments * First merge to master: Renderer * fix emulation * passing all tests arm64 * cleaning * fix handcoded binary * cleaning * fix errs * fix runtime arg binary * clean git diff * fix and clean * fixing metal test * cleaning * fix metal test * ci ~8 min * fix pylint and clang * cache the files in ops_clang --------- Co-authored-by: George Hotz <72895+geohot@users.noreply.github.com>
2023-08-15 10:29:30 +08:00
ret = Device[Device.DEFAULT].renderer("test", uops)
src, global_size, local_size, binary = ret if len(ret) == 4 else ret + (False,)
return ASTRunner("test", src, global_size, local_size, runtime_args={"binary": binary}).build(Device[Device.DEFAULT].runtime)
def uop(uops:List[UOp], uop:UOps, dtype:Optional[DType], vin:Tuple[UOp, ...], arg:Any=None) -> UOp:
uops.append(UOp(uop, dtype, tuple(vin), arg, len(uops)))
return uops[-1]
def _test_single_value(vals, op, dtype):
uops = []
uop(uops, UOps.DEFINE_GLOBAL, None, (), ('data0', dtype))
for i in range(len(vals)): uop(uops, UOps.DEFINE_GLOBAL, None, (), (f'data{i+1}', dtype))
loads = (uop(uops, UOps.LOAD, dtype, [], MemOp(f'data{i+1}', Variable.num(0), False, dtype, Variable.ands([]))) for i in range(len(vals)))
alu = uop(uops, UOps.ALU, dtype, loads, op)
uop(uops, UOps.STORE, None, (alu, ), MemOp('data0', Variable.num(0), False, dtype, Variable.ands([])))
buf = Device[Device.DEFAULT].buffer(1, dtype)
buf2 = [Device[Device.DEFAULT].buffer.fromCPU(np.array([a], dtype=dtype.np)) for a in vals]
prg = _uops_to_prg(uops)
prg([buf]+buf2)
return buf.toCPU()[0]
def _test_single_value_const(vals, op, dtype):
uops = []
uop(uops, UOps.DEFINE_GLOBAL, None, (), ('data0', dtype))
loads = (uop(uops, UOps.CONST, dtype, [], a) for a in vals)
alu = uop(uops, UOps.ALU, dtype, loads, op)
uop(uops, UOps.STORE, None, (alu, ), MemOp('data0', Variable.num(0), False, dtype, Variable.ands([])))
buf = Device[Device.DEFAULT].buffer(1, dtype)
prg = _uops_to_prg(uops)
prg([buf])
return buf.toCPU()[0]
class TestUOps(unittest.TestCase):
def _equal(self, v1, v2):
if not (math.isnan(v1) and math.isnan(v2)): self.assertAlmostEqual(v1, v2, places=5)
def _test_uop_fxn(self, bop, fxn, dt=dtypes.float32):
for f in [_test_single_value, _test_single_value_const]:
for a in [-2.0, 0.0, 1.0, 2.0]:
self._equal(f([a], bop, dt), fxn(a))
def _test_bop_fxn(self, bop, fxn, dt=dtypes.float32, no_b_zero=False):
for f in [_test_single_value, _test_single_value_const]:
for a in [-2.0, 0.0, 1.0, 2.0]:
for b in [-3.0, 1.0, 3.0] + ([] if no_b_zero else [0.0]):
self._equal(f([a,b], bop, dt), fxn(a,b))
def _test_top_fxn(self, bop, fxn, dt=dtypes.float32):
for f in [_test_single_value, _test_single_value_const]:
for a in [-2.0, 0, 1, 2.0]:
for b in [-3.0, 3.0]:
for c in [-4.0, 4.0]:
self._equal(f([a,b,c], bop, dt), fxn(a,b,c))
@unittest.skipIf(not isinstance(Device[Device.DEFAULT], Compiled), "only test for compiled backends")
class TestFloatUOps(TestUOps):
def test_exp2(self): self._test_uop_fxn(UnaryOps.EXP2, lambda a: np.exp2(a))
def test_log2(self): self._test_uop_fxn(UnaryOps.LOG2, lambda a: math.log2(a) if a > 0 else float('-inf' if a==0 else 'nan'))
def test_sin(self): self._test_uop_fxn(UnaryOps.SIN, lambda a: math.sin(a))
def test_sqrt(self): self._test_uop_fxn(UnaryOps.SQRT, lambda a: math.sqrt(a) if a >= 0 else float('nan'))
# this is not on most backends
#def test_recip(self): self._test_uop_fxn(UnaryOps.RECIP, lambda a: 1.0/a if a != 0 else float('inf'))
def test_add(self): self._test_bop_fxn(BinaryOps.ADD, lambda a,b: a+b)
def test_sub(self): self._test_bop_fxn(BinaryOps.SUB, lambda a,b: a-b)
def test_mul(self): self._test_bop_fxn(BinaryOps.MUL, lambda a,b: a*b)
def test_div(self): self._test_bop_fxn(BinaryOps.DIV, lambda a,b: a/b if b != 0 else a*float('inf'))
def test_max(self): self._test_bop_fxn(BinaryOps.MAX, lambda a,b: max(a,b))
def test_cmplt(self): self._test_bop_fxn(BinaryOps.CMPLT, lambda a,b: float(a<b))
# MOD isn't tested on floats
def test_mulacc(self): self._test_top_fxn(TernaryOps.MULACC, lambda a,b,c: (a*b)+c)
def test_where(self): self._test_top_fxn(TernaryOps.WHERE, lambda a,b,c: b if a!=0 else c)
# TODO: fix this on all the backends
@unittest.skipIf(not isinstance(Device[Device.DEFAULT], Compiled) or getenv('ARM64', False), "only test for compiled backends, broken on some")
class TestNonFloatUOps(TestUOps):
def test_add_int32(self): self._test_bop_fxn(BinaryOps.ADD, lambda a,b: int(a)+int(b), dtypes.int32)
def test_sub_int32(self): self._test_bop_fxn(BinaryOps.SUB, lambda a,b: int(a)-int(b), dtypes.int32)
def test_mul_int32(self): self._test_bop_fxn(BinaryOps.MUL, lambda a,b: int(a)*int(b), dtypes.int32)
def test_div_int32(self): self._test_bop_fxn(BinaryOps.DIV, lambda a,b: int(a/b), dtypes.int32, no_b_zero=True)
def test_mod_int32(self): self._test_bop_fxn(BinaryOps.MOD, lambda a,b: abs(int(a))%abs(int(b))*(1,-1)[a<0], dtypes.int32, no_b_zero=True)
def test_cmplt_int32(self): self._test_bop_fxn(BinaryOps.CMPLT, lambda a,b: float(a<b), dtypes.int32)
def test_mul_bool(self): self._test_bop_fxn(BinaryOps.MUL, lambda a,b: bool(a) and bool(b), dtypes.bool)
if __name__ == '__main__':
unittest.main(verbosity=2)