From 9dd64b1f5ff423c04723c43615089dab0e8632e9 Mon Sep 17 00:00:00 2001 From: Patrick Tsai <5304405+patosai@users.noreply.github.com> Date: Tue, 20 Feb 2024 03:20:43 -0500 Subject: [PATCH] Fix python cast uint/int overflow (#3448) * Fix numpy uint/int overflow * lol * Works * Update * Move overflow test to float64/float32 * One line * Update * One more --------- Co-authored-by: Patrick Tsai --- test/test_dtype.py | 22 ++++++++++++++++++++-- tinygrad/runtime/ops_python.py | 6 +++--- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/test/test_dtype.py b/test/test_dtype.py index a83a224d..bb270758 100644 --- a/test/test_dtype.py +++ b/test/test_dtype.py @@ -159,7 +159,12 @@ class TestBFloat16DType(unittest.TestCase): class TestHalfDtype(TestDType): DTYPE = dtypes.half -class TestFloatDType(TestDType): DTYPE = dtypes.float +class TestFloatDType(TestDType): + DTYPE = dtypes.float + + def test_float_to_uint(self): + _test_op(lambda: Tensor([-0.9, -0.3, 1.2], dtype=dtypes.float32).cast(dtypes.uint32), dtypes.uint32, + [0, 0, 1]) class TestDoubleDtype(TestDType): DTYPE = dtypes.double @@ -181,12 +186,20 @@ class TestDoubleDtype(TestDType): a = [2, 3, 4] np.testing.assert_allclose(func(Tensor(a, dtype=self.DTYPE)).numpy(), func(torch.tensor(a, dtype=torch.float64)), rtol=1e-12, atol=1e-12) + def test_float64_to_float32_cast_inf(self): + _test_op(lambda: Tensor([3.4e40, 3.4e38, 1, 0], dtype=dtypes.float64).cast(dtypes.float32), + dtypes.float32, [float('inf'), 3.4e38, 1, 0]) + + class TestInt8Dtype(TestDType): DTYPE = dtypes.int8 @unittest.skipIf(getenv("CUDA",0)==1 or getenv("PTX", 0)==1, "cuda saturation works differently") def test_int8_to_uint8_negative(self): _test_op(lambda: Tensor([-1, -2, -3, -4], dtype=dtypes.int8).cast(dtypes.uint8), dtypes.uint8, [255, 254, 253, 252]) + def test_int8_to_uint16_negative(self): + _test_op(lambda: Tensor([-1, -2, -3, -4], dtype=dtypes.int8).cast(dtypes.uint16), dtypes.uint16, [2**16-1, 2**16-2, 2**16-3, 2**16-4]) + class TestUint8Dtype(TestDType): DTYPE = dtypes.uint8 @unittest.skipIf(getenv("CUDA",0)==1 or getenv("PTX", 0)==1, "cuda saturation works differently") @@ -210,7 +223,12 @@ class TestBitCast(unittest.TestCase): assert b.numpy()[0,0] == 1. class TestInt16Dtype(TestDType): DTYPE = dtypes.int16 -class TestUint16Dtype(TestDType): DTYPE = dtypes.uint16 + +class TestUint16Dtype(TestDType): + DTYPE = dtypes.uint16 + + def test_uint16_to_int8_overflow(self): + _test_op(lambda: Tensor([2**16-1, 2**16-2, 1, 0], dtype=dtypes.uint16).cast(dtypes.int8), dtypes.int8, [-1, -2, 1, 0]) class TestInt32Dtype(TestDType): DTYPE = dtypes.int32 class TestUint32Dtype(TestDType): DTYPE = dtypes.uint32 diff --git a/tinygrad/runtime/ops_python.py b/tinygrad/runtime/ops_python.py index a8139315..540e3c79 100644 --- a/tinygrad/runtime/ops_python.py +++ b/tinygrad/runtime/ops_python.py @@ -134,9 +134,9 @@ class PythonProgram: ul[i] = list(struct.unpack(unpack_format, struct.pack(pack_format, *inp[0]))) else: casted = [float(x) if dtypes.is_float(dtype) else int(x) if dtypes.is_int(dtype) else x for x in inp[0]] - packed = struct.pack(pack_format if (dtypes.is_int(dtype) and dtypes.is_int(dtp[0]) and dtype.itemsize == dtp[0].itemsize) - else unpack_format, *casted) - ul[i] = list(struct.unpack(unpack_format, packed)) + overflow_adjust = 2**(dtype.itemsize*8 - 1) if (dtypes.is_int(dtype) and not dtypes.is_unsigned(dtype)) else 0 + overflow_fixed = [((x + overflow_adjust) % 2**(dtype.itemsize*8) - overflow_adjust) if dtypes.is_int(dtype) else x for x in casted] + ul[i] = list(struct.unpack(unpack_format, struct.pack(unpack_format, *overflow_fixed))) elif uop is UOps.LOAD: if isinstance(dtp[0], ImageDType): assert dtype.count == 4