From 66d9eb10b6afd96839f601ab57a8b06dd6490cca Mon Sep 17 00:00:00 2001
From: chenyu <chenyu@fastmail.com>
Date: Thu, 14 Dec 2023 17:53:00 -0500
Subject: [PATCH] arange default dtype to int and zeros/ones default to float
 (#2769)

---
 extra/onnx_ops.py  |  4 ++--
 test/test_dtype.py | 31 +++++++++++++++++++++++++++++++
 tinygrad/tensor.py | 12 +++++++-----
 3 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/extra/onnx_ops.py b/extra/onnx_ops.py
index 67e0c02c..ceaf3bf0 100644
--- a/extra/onnx_ops.py
+++ b/extra/onnx_ops.py
@@ -477,8 +477,8 @@ def Resize(X:Tensor, roi=None, scales=None, sizes=None, antialias=0, axes=None,
   output_shape = sizes if sizes else [math.floor(x*s) for x,s in zip(X.shape, scales)]
   output_shape_ = sizes if sizes else [x*s for x,s in zip(X.shape, scales)]
   scales_lol = [os/xs for xs, os in zip(X.shape, output_shape)]
-  x_out = Tensor.arange(output_shape[-1])
-  y_out = Tensor.arange(output_shape[-2])
+  x_out = Tensor.arange(output_shape[-1]).cast(Tensor.default_type)
+  y_out = Tensor.arange(output_shape[-2]).cast(Tensor.default_type)
   if mode == "nearest":
     x_out, y_out = _coordinate_transformation(x_out, y_out, output_shape, scales_lol, roi)
     x_out = _nearest_mode(x_out, nearest_mode, X.shape[-1])
diff --git a/test/test_dtype.py b/test/test_dtype.py
index aaf364c2..40796d81 100644
--- a/test/test_dtype.py
+++ b/test/test_dtype.py
@@ -236,6 +236,37 @@ class TestTypeSpec(unittest.TestCase):
     assert Tensor.ones([2,3,0]).sum(2).dtype ==  Tensor.default_type
     # assert Tensor.ones([2,3,0], dtype=dtypes.int).sum(2).dtype == dtypes.int
 
+  def test_arange(self):
+    assert Tensor.arange(5).dtype == dtypes.int32
+    assert Tensor.arange(5.0).dtype == Tensor.default_type
+    assert Tensor.arange(5, dtype=dtypes.int16).dtype == dtypes.int16
+    assert Tensor.arange(5, dtype=dtypes.int64).dtype == dtypes.int64
+    assert Tensor.arange(5, dtype=dtypes.float16).dtype == dtypes.float16
+    assert Tensor.arange(3, 9, 0.7).dtype == Tensor.default_type
+    assert Tensor.arange(3, 8.5, 3).dtype == Tensor.default_type
+
+  def test_zeros(self):
+    assert Tensor.zeros(3, 3).dtype == Tensor.default_type
+    assert Tensor.zeros(3, 3, dtype= dtypes.float16).dtype == dtypes.float16
+    assert Tensor.zeros(3, 3, dtype= dtypes.int64).dtype == dtypes.int64
+
+  def test_ones(self):
+    assert Tensor.ones(3, 3).dtype == Tensor.default_type
+    assert Tensor.ones(3, 3, dtype= dtypes.float16).dtype == dtypes.float16
+    assert Tensor.ones(3, 3, dtype= dtypes.int64).dtype == dtypes.int64
+
+  def test_full(self):
+    assert Tensor.full((3, 3), 3).dtype == dtypes.int
+    assert Tensor.full((3, 3), 3.0).dtype == Tensor.default_type
+    assert Tensor.full((3, 3), 3, dtype= dtypes.float16).dtype == dtypes.float16
+    assert Tensor.full((3, 3), 3, dtype= dtypes.int64).dtype == dtypes.int64
+
+  def test_eye(self):
+    assert Tensor.eye(0).dtype == Tensor.default_type
+    assert Tensor.eye(3).dtype == Tensor.default_type
+    assert Tensor.eye(3, dtype= dtypes.float16).dtype == dtypes.float16
+    assert Tensor.eye(3, dtype= dtypes.int64).dtype == dtypes.int64
+
 core_types = list(DTYPES_DICT.values())
 class TestTypePromotion(unittest.TestCase):
   @given(st.sampled_from(core_types))
diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py
index 02bed595..784f77e5 100644
--- a/tinygrad/tensor.py
+++ b/tinygrad/tensor.py
@@ -169,22 +169,24 @@ class Tensor:
 
   @staticmethod
   def full(shape:Tuple[sint, ...], fill_value, **kwargs):
-    return Tensor(fill_value, **kwargs).reshape([1]*len(new_shape := argfix(shape))).expand(new_shape)
+    dtype = kwargs.pop("dtype", Tensor.default_type if isinstance(fill_value, float) else dtypes.int32)
+    return Tensor(fill_value, dtype=dtype, **kwargs).reshape([1]*len(new_shape := argfix(shape))).expand(new_shape)
 
   @staticmethod
-  def zeros(*shape, **kwargs): return Tensor.full(argfix(*shape), 0, **kwargs)
+  def zeros(*shape, **kwargs): return Tensor.full(argfix(*shape), 0.0, **kwargs)
 
   @staticmethod
-  def ones(*shape, **kwargs): return Tensor.full(argfix(*shape), 1, **kwargs)
+  def ones(*shape, **kwargs): return Tensor.full(argfix(*shape), 1.0, **kwargs)
 
   @staticmethod
   def arange(start, stop=None, step=1, **kwargs):
     if stop is None: stop, start = start, 0
-    return Tensor.full((math.ceil((stop-start)/step),), step, **kwargs).cumsum() + (start - step)
+    dtype = kwargs.pop("dtype", Tensor.default_type if any(isinstance(x, float) for x in (start, stop, step)) else dtypes.int32)
+    return Tensor.full((math.ceil((stop-start)/step),), step, dtype=dtype, **kwargs).cumsum() + (start - step)
 
   @staticmethod
   def eye(dim:int, **kwargs):
-    return Tensor.full((dim,1),1,**kwargs).pad(((0,0),(0,dim))).reshape(dim*(dim+1)).shrink(((0,dim*dim),)).reshape(dim, dim)
+    return Tensor.full((dim,1),1.0,**kwargs).pad((None,(0,dim))).reshape(dim*(dim+1)).shrink(((0,dim*dim),)).reshape(dim, dim)
 
   def full_like(self, fill_value, **kwargs):
     return Tensor.full(self.shape, fill_value=fill_value, dtype=kwargs.pop("dtype", self.dtype), device=kwargs.pop("device", self.device), **kwargs)