Consistent testing (#137)

* Consistent GPU classes

Convert the existing GPU classes into one standard format.

Remove duplicated functions in `test_mnist` and create a TestMNISTGPU
class. This reduces line count and ensures consistency.

Use `@unittest.skipUnless(GPU, "Requires GPU")` instead of `if GPU:` to
skip GPU testing. This will ensure that skipped tests are displayed
accordingly in the pytest output.

* Optim Testing now supports GPU

* Tensor testing now supports GPU

jacobian and gradcheck auto skipped until GPU float64 support added.

* GPU support for custom constructor methods

* Remove GPU flag from Model constructors

It was requested that the `gpu` kwarg be removed from the model
constructor. GPU conversion is now handled in the train function.

This also required the conversion of Optimizer parameters as they are
constructed prior to execution of the `train` function and are dependant
on the model GPU state.

* Fix typo: float32->float64

* Clean `get_parameters` utility

Just a quick refactor w/ the new support for optimizers.

* Remove GPU kwarg from TinyNet

Remove `gpu` kwarg from tiny net to match test_mnist `train` function.
This commit is contained in:
Liam 2020-12-09 11:25:27 +01:00 committed by GitHub
parent 34b38dd4d0
commit 89d0ff6989
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 64 additions and 63 deletions

View File

@ -22,6 +22,7 @@ X_train, Y_train, X_test, Y_test = fetch_mnist()
# create a model # create a model
class TinyBobNet: class TinyBobNet:
def __init__(self): def __init__(self):
self.l1 = Tensor.uniform(784, 128) self.l1 = Tensor.uniform(784, 128)
self.l2 = Tensor.uniform(128, 10) self.l2 = Tensor.uniform(128, 10)
@ -54,6 +55,7 @@ class TinyConvNet:
return x.dot(self.l1).logsoftmax() return x.dot(self.l1).logsoftmax()
def train(model, optim, steps, BS=128, gpu=False): def train(model, optim, steps, BS=128, gpu=False):
if gpu is True: [x.cuda_() for x in get_parameters([model, optim])]
losses, accuracies = [], [] losses, accuracies = [], []
for i in (t := trange(steps, disable=os.getenv('CI') is not None)): for i in (t := trange(steps, disable=os.getenv('CI') is not None)):
samp = np.random.randint(0, X_train.shape[0], size=(BS)) samp = np.random.randint(0, X_train.shape[0], size=(BS))
@ -94,53 +96,32 @@ def evaluate(model, gpu=False):
assert accuracy > 0.95 assert accuracy > 0.95
class TestMNIST(unittest.TestCase): class TestMNIST(unittest.TestCase):
@unittest.skipUnless(GPU, "Requires GPU") gpu=False
def test_conv_gpu(self):
np.random.seed(1337)
model = TinyConvNet()
[x.cuda_() for x in model.parameters()]
optimizer = optim.Adam(model.parameters(), lr=0.001)
train(model, optimizer, steps=200, gpu=True)
evaluate(model, gpu=True)
def test_conv(self): def test_conv(self):
np.random.seed(1337) np.random.seed(1337)
model = TinyConvNet() model = TinyConvNet()
optimizer = optim.Adam(model.parameters(), lr=0.001) optimizer = optim.Adam(model.parameters(), lr=0.001)
train(model, optimizer, steps=200) train(model, optimizer, steps=200, gpu=self.gpu)
evaluate(model) evaluate(model, gpu=self.gpu)
@unittest.skipUnless(GPU, "Requires GPU")
def test_sgd_gpu(self):
np.random.seed(1337)
model = TinyBobNet()
[x.cuda_() for x in model.parameters()]
optimizer = optim.SGD(model.parameters(), lr=0.001)
train(model, optimizer, steps=1000, gpu=True)
evaluate(model, gpu=True)
def test_sgd(self): def test_sgd(self):
np.random.seed(1337) np.random.seed(1337)
model = TinyBobNet() model = TinyBobNet()
optimizer = optim.SGD(model.parameters(), lr=0.001) optimizer = optim.SGD(model.parameters(), lr=0.001)
train(model, optimizer, steps=1000) train(model, optimizer, steps=1000, gpu=self.gpu)
evaluate(model) evaluate(model, gpu=self.gpu)
@unittest.skipUnless(GPU, "Requires GPU")
def test_rmsprop_gpu(self):
np.random.seed(1337)
model = TinyBobNet()
[x.cuda_() for x in model.parameters()]
optimizer = optim.RMSprop(model.parameters(), lr=0.0002)
train(model, optimizer, steps=1000, gpu=True)
evaluate(model, gpu=True)
def test_rmsprop(self): def test_rmsprop(self):
np.random.seed(1337) np.random.seed(1337)
model = TinyBobNet() model = TinyBobNet()
optimizer = optim.RMSprop(model.parameters(), lr=0.0002) optimizer = optim.RMSprop(model.parameters(), lr=0.0002)
train(model, optimizer, steps=1000) train(model, optimizer, steps=1000, gpu=self.gpu)
evaluate(model) evaluate(model, gpu=self.gpu)
@unittest.skipUnless(GPU, "Requires GPU")
class TestMNISTGPU(TestMNIST):
gpu = True
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -145,9 +145,9 @@ class TestOps(unittest.TestCase):
lambda x: torch.nn.functional.avg_pool2d(x, kernel_size=ksz), lambda x: torch.nn.functional.avg_pool2d(x, kernel_size=ksz),
lambda x: Tensor.avg_pool2d(x, kernel_size=ksz), gpu=self.gpu) lambda x: Tensor.avg_pool2d(x, kernel_size=ksz), gpu=self.gpu)
if GPU: @unittest.skipUnless(GPU, "Requires GPU")
class TestOpsGPU(TestOps): class TestOpsGPU(TestOps):
gpu = True gpu = True
if __name__ == '__main__': if __name__ == '__main__':
unittest.main(verbosity=2) unittest.main(verbosity=2)

View File

@ -1,20 +1,22 @@
import numpy as np import numpy as np
import torch import torch
import unittest import unittest
from tinygrad.tensor import Tensor from tinygrad.tensor import Tensor, GPU
from tinygrad.optim import Adam, SGD, RMSprop from tinygrad.optim import Adam, SGD, RMSprop
from tinygrad.utils import get_parameters
x_init = np.random.randn(1,3).astype(np.float32) x_init = np.random.randn(1,3).astype(np.float32)
W_init = np.random.randn(3,3).astype(np.float32) W_init = np.random.randn(3,3).astype(np.float32)
m_init = np.random.randn(1,3).astype(np.float32) m_init = np.random.randn(1,3).astype(np.float32)
def step_tinygrad(optim, kwargs={}): def step_tinygrad(optim, kwargs={}, gpu=False):
net = TinyNet() net = TinyNet()
optim = optim([net.x, net.W], **kwargs) optim = optim([net.x, net.W], **kwargs)
if gpu is True: [x.cuda_() for x in get_parameters([net, optim])]
out = net.forward() out = net.forward()
out.backward() out.backward()
optim.step() optim.step()
return net.x.data, net.W.data return net.x.cpu().data, net.W.cpu().data
def step_pytorch(optim, kwargs={}): def step_pytorch(optim, kwargs={}):
net = TorchNet() net = TorchNet()
@ -52,21 +54,29 @@ class TorchNet():
class TestOptim(unittest.TestCase): class TestOptim(unittest.TestCase):
gpu = False
def test_adam(self): def test_adam(self):
for x,y in zip(step_tinygrad(Adam), for x,y in zip(step_tinygrad(Adam, gpu=self.gpu),
step_pytorch(torch.optim.Adam)): step_pytorch(torch.optim.Adam)):
np.testing.assert_allclose(x, y, atol=1e-4) np.testing.assert_allclose(x, y, atol=1e-4)
def test_sgd(self): def test_sgd(self):
for x,y in zip(step_tinygrad(SGD, kwargs={'lr': 0.001}), for x,y in zip(step_tinygrad(SGD, kwargs={'lr': 0.001}, gpu=self.gpu),
step_pytorch(torch.optim.SGD, kwargs={'lr': 0.001})): step_pytorch(torch.optim.SGD, kwargs={'lr': 0.001})):
np.testing.assert_allclose(x, y, atol=1e-5) np.testing.assert_allclose(x, y, atol=1e-5)
def test_rmsprop(self): def test_rmsprop(self):
for x,y in zip(step_tinygrad(RMSprop, kwargs={'lr': 0.001, 'decay': 0.99}), for x,y in zip(step_tinygrad(RMSprop, kwargs={'lr': 0.001, 'decay': 0.99}, gpu=self.gpu),
step_pytorch(torch.optim.RMSprop, step_pytorch(torch.optim.RMSprop,
kwargs={'lr': 0.001, 'alpha': 0.99})): kwargs={'lr': 0.001, 'alpha': 0.99})):
np.testing.assert_allclose(x, y, atol=1e-5) np.testing.assert_allclose(x, y, atol=1e-5)
@unittest.skipUnless(GPU, "Requires GPU")
class TestOptimGPU(TestOptim):
gpu = True
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -1,7 +1,7 @@
import numpy as np import numpy as np
import torch import torch
import unittest import unittest
from tinygrad.tensor import Tensor from tinygrad.tensor import Tensor, GPU
from extra.gradcheck import numerical_jacobian, jacobian, gradcheck from extra.gradcheck import numerical_jacobian, jacobian, gradcheck
x_init = np.random.randn(1,3).astype(np.float32) x_init = np.random.randn(1,3).astype(np.float32)
@ -9,16 +9,18 @@ W_init = np.random.randn(3,3).astype(np.float32)
m_init = np.random.randn(1,3).astype(np.float32) m_init = np.random.randn(1,3).astype(np.float32)
class TestTinygrad(unittest.TestCase): class TestTinygrad(unittest.TestCase):
gpu = False
def test_backward_pass(self): def test_backward_pass(self):
def test_tinygrad(): def test_tinygrad():
x = Tensor(x_init) x = Tensor(x_init, gpu=self.gpu)
W = Tensor(W_init) W = Tensor(W_init, gpu=self.gpu)
m = Tensor(m_init) m = Tensor(m_init, gpu=self.gpu)
out = x.dot(W).relu() out = x.dot(W).relu()
out = out.logsoftmax() out = out.logsoftmax()
out = out.mul(m).add(m).sum() out = out.mul(m).add(m).sum()
out.backward() out.backward()
return out.data, x.grad.data, W.grad.data return out.cpu().data, x.grad.cpu().data, W.grad.cpu().data
def test_pytorch(): def test_pytorch():
x = torch.tensor(x_init, requires_grad=True) x = torch.tensor(x_init, requires_grad=True)
@ -42,8 +44,8 @@ class TestTinygrad(unittest.TestCase):
torch_func = lambda x: torch.nn.functional.log_softmax(x.matmul(torch_W).relu(), dim=1) torch_func = lambda x: torch.nn.functional.log_softmax(x.matmul(torch_W).relu(), dim=1)
PJ = torch.autograd.functional.jacobian(torch_func, torch_x).squeeze().numpy() PJ = torch.autograd.functional.jacobian(torch_func, torch_x).squeeze().numpy()
tiny_x = Tensor(x) tiny_x = Tensor(x, gpu=self.gpu)
tiny_W = Tensor(W) tiny_W = Tensor(W, gpu=self.gpu)
tiny_func = lambda x: x.dot(tiny_W).relu().logsoftmax() tiny_func = lambda x: x.dot(tiny_W).relu().logsoftmax()
J = jacobian(tiny_func, tiny_x) J = jacobian(tiny_func, tiny_x)
NJ = numerical_jacobian(tiny_func, tiny_x) NJ = numerical_jacobian(tiny_func, tiny_x)
@ -55,8 +57,8 @@ class TestTinygrad(unittest.TestCase):
W = np.random.RandomState(1337).random((10, 5)) W = np.random.RandomState(1337).random((10, 5))
x = np.random.RandomState(7331).random((1, 10)) - 0.5 x = np.random.RandomState(7331).random((1, 10)) - 0.5
tiny_x = Tensor(x) tiny_x = Tensor(x, gpu=self.gpu)
tiny_W = Tensor(W) tiny_W = Tensor(W, gpu=self.gpu)
tiny_func = lambda x: x.dot(tiny_W).relu().logsoftmax() tiny_func = lambda x: x.dot(tiny_W).relu().logsoftmax()
self.assertTrue(gradcheck(tiny_func, tiny_x)) self.assertTrue(gradcheck(tiny_func, tiny_x))
@ -64,5 +66,17 @@ class TestTinygrad(unittest.TestCase):
# coarse approx. since a "big" eps and the non-linearities of the model # coarse approx. since a "big" eps and the non-linearities of the model
self.assertFalse(gradcheck(tiny_func, tiny_x, eps = 0.1)) self.assertFalse(gradcheck(tiny_func, tiny_x, eps = 0.1))
@unittest.skipUnless(GPU, "Requires GPU")
class TestTinygradGPU(TestTinygrad):
gpu = True
@unittest.skip("float64 not supported on GPU")
def test_jacobian(self): pass
@unittest.skip("float64 not supported on GPU")
def test_gradcheck(self): pass
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -15,19 +15,15 @@ def fetch(url):
os.rename(fp+".tmp", fp) os.rename(fp+".tmp", fp)
return dat return dat
def get_parameters(model): def get_parameters(obj):
if isinstance(model, Tensor):
return [model]
parameters = [] parameters = []
if hasattr(model, '__dict__'): if isinstance(obj, Tensor):
for k,v in model.__dict__.items(): parameters.append(obj)
if isinstance(v, Tensor): elif isinstance(obj, list):
parameters.append(v) for x in obj:
elif isinstance(v, list): parameters.extend(get_parameters(x))
for x in v: elif hasattr(obj, '__dict__'):
parameters.extend(get_parameters(x)) for k,v in obj.__dict__.items():
elif hasattr(v, '__dict__'): parameters.extend(get_parameters(v))
parameters.extend(get_parameters(v))
#print(k, type(v))
return parameters return parameters