tinygrad/extra/gradcheck.py

51 lines
1.5 KiB
Python

import numpy as np
from tinygrad.tensor import Tensor
def mask_like(like, mask_inx, mask_value = 1.0):
mask = np.zeros_like(like).reshape(-1)
mask[mask_inx] = mask_value
return mask.reshape(like.shape)
def jacobian(func, input):
output = func(input)
ji = input.numpy().reshape(-1).shape[-1]
jo = output.numpy().reshape(-1).shape[-1]
J = np.zeros((jo,ji), dtype=np.float32)
for o in range(jo):
input.grad = None
output = func(input)
# tinygrad doesn't support slicing, tiny-hack to select
# the needed scalar an backpropagate only through it
o_scalar = Tensor(mask_like(output.numpy(), o, 1.)).mul(output).sum()
o_scalar.backward()
for i, grad in enumerate(input.grad.numpy().reshape(-1)):
J[o,i] = grad
return J
def numerical_jacobian(func, input, eps = 1e-3):
output = func(input)
ji = input.numpy().reshape(-1).shape[-1]
jo = output.numpy().reshape(-1).shape[-1]
NJ = np.zeros((jo, ji), dtype=np.float32)
for i in range(ji):
eps_perturb = mask_like(input.numpy(), i, mask_value = eps)
output_perturb_add = func(Tensor(input.numpy() + eps_perturb)).numpy().reshape(-1)
output_perturb_sub = func(Tensor(input.numpy() - eps_perturb)).numpy().reshape(-1)
grad_approx = ((output_perturb_add) - (output_perturb_sub)) / (2*eps)
NJ[:,i] = grad_approx
return NJ
def gradcheck(func, input, eps = 1e-3, atol = 1e-3, rtol = 1e-3):
NJ = numerical_jacobian(func, input, eps)
J = jacobian(func, input)
return np.allclose(J, NJ, atol = atol, rtol = rtol)