diff --git a/test/test_tensor.py b/test/test_tensor.py
index 06391114..9a7455ea 100644
--- a/test/test_tensor.py
+++ b/test/test_tensor.py
@@ -64,6 +64,14 @@ class TestTinygrad(unittest.TestCase):
     for x,y in zip(test_tinygrad(), test_pytorch()):
       np.testing.assert_allclose(x, y, atol=1e-5)
 
+  def test_dropout(self):
+    Tensor.training = True
+    n, rate = 1_000_000, 0.1
+    w = Tensor.ones(n).dropout(rate)
+    non_zeros = np.count_nonzero(w.cpu().data)
+    expected = n * (1 - rate)
+    np.testing.assert_allclose(non_zeros, expected, rtol=1e-3)
+
   @unittest.skipUnless(not DEFAULT_DEVICE, "float64 not supported on GPU")
   def test_jacobian(self):
     W = np.random.RandomState(1337).random((10, 5))
diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py
index 1da79f3a..f9216682 100644
--- a/tinygrad/tensor.py
+++ b/tinygrad/tensor.py
@@ -267,7 +267,6 @@ class Tensor:
     return self - ss
 
   def dropout(self, p=0.5):
-    # TODO: this needs a test
     if Tensor.training:
       _mask = np.asarray(np.random.binomial(1, 1.0-p, size=self.shape), dtype=self.dtype)
       return self * Tensor(_mask, requires_grad=False, device=self.device) * (1/(1.0 - p))