tinygrad/test/test_mnist.py

#!/usr/bin/env python
import os
import unittest
import numpy as np
from tinygrad.tensor import Tensor
from tinygrad.utils import layer_init_uniform, fetch_mnist
import tinygrad.optim as optim
from tqdm import trange

# load the mnist dataset
X_train, Y_train, X_test, Y_test = fetch_mnist()

# create a model
class TinyBobNet:
  def __init__(self):
    self.l1 = Tensor(layer_init_uniform(784, 128))
    self.l2 = Tensor(layer_init_uniform(128, 10))

  def forward(self, x):
    return x.dot(self.l1).relu().dot(self.l2).logsoftmax()

# create a model with a conv layer
class TinyConvNet:
  def __init__(self):
    # https://keras.io/examples/vision/mnist_convnet/
    conv = 3
    #inter_chan, out_chan = 32, 64
    inter_chan, out_chan = 8, 16   # for speed
    self.c1 = Tensor(layer_init_uniform(inter_chan,1,conv,conv))
    self.c2 = Tensor(layer_init_uniform(out_chan,inter_chan,conv,conv))
    self.l1 = Tensor(layer_init_uniform(out_chan*5*5, 10))

  def forward(self, x):
    x.data = x.data.reshape((-1, 1, 28, 28)) # hacks
    x = x.conv2d(self.c1).relu().max_pool2d()
    x = x.conv2d(self.c2).relu().max_pool2d()
    x = x.reshape(Tensor(np.array((x.shape[0], -1))))
    return x.dot(self.l1).logsoftmax()

def train(model, optim, steps, BS=128):
  losses, accuracies = [], []
  for i in (t := trange(steps, disable=os.getenv('CI') is not None)):
    samp = np.random.randint(0, X_train.shape[0], size=(BS))
    
    x = Tensor(X_train[samp].reshape((-1, 28*28)).astype(np.float32))
    Y = Y_train[samp]
    y = np.zeros((len(samp),10), np.float32)
    # correct loss for NLL, torch NLL loss returns one per row
    y[range(y.shape[0]),Y] = -10.0
    y = Tensor(y)
    
    # network
    out = model.forward(x)

    # NLL loss function
    loss = out.mul(y).mean()
    loss.backward()
    optim.step()
    
    cat = np.argmax(out.data, axis=1)
    accuracy = (cat == Y).mean()
    
    # printing
    loss = loss.data
    losses.append(loss)
    accuracies.append(accuracy)
    t.set_description("loss %.2f accuracy %.2f" % (loss, accuracy))

def evaluate(model):
  def numpy_eval():
    Y_test_preds_out = model.forward(Tensor(X_test.reshape((-1, 28*28)).astype(np.float32)))
    Y_test_preds = np.argmax(Y_test_preds_out.data, axis=1)
    return (Y_test == Y_test_preds).mean()

  accuracy = numpy_eval()
  print("test set accuracy is %f" % accuracy)
  assert accuracy > 0.95

class TestMNIST(unittest.TestCase):
  def test_conv(self):
    np.random.seed(1337)
    model = TinyConvNet()
    optimizer = optim.Adam([model.c1, model.c2, model.l1], lr=0.001)
    train(model, optimizer, steps=400)
    evaluate(model)
    
  def test_sgd(self):
    np.random.seed(1337)
    model = TinyBobNet()
    optimizer = optim.SGD([model.l1, model.l2], lr=0.001)
    train(model, optimizer, steps=1000)
    evaluate(model)
    
  def test_rmsprop(self):
    np.random.seed(1337)
    model = TinyBobNet()
    optimizer = optim.RMSprop([model.l1, model.l2], lr=0.0002)
    train(model, optimizer, steps=1000)
    evaluate(model)

if __name__ == '__main__':
  unittest.main()
add mnist example 2020-10-19 01:16:01 +08:00			`#!/usr/bin/env python`
add reshape support and OMG the CONVS are SO SLOW 2020-10-22 00:12:19 +08:00			`import os`
Make test_mnist runnable by pytest and directly 2020-10-22 00:30:08 +08:00			`import unittest`
add mnist example 2020-10-19 01:16:01 +08:00			`import numpy as np`
readme and dirs 2020-10-19 03:48:17 +08:00			`from tinygrad.tensor import Tensor`
refactor into utils 2020-10-19 05:36:29 +08:00			`from tinygrad.utils import layer_init_uniform, fetch_mnist`
update rsmprop 2020-10-23 20:46:45 +08:00			`import tinygrad.optim as optim`
add mnist example 2020-10-19 01:16:01 +08:00			`from tqdm import trange`

			`# load the mnist dataset`
refactor into a few files 2020-10-19 04:30:25 +08:00			`X_train, Y_train, X_test, Y_test = fetch_mnist()`
add mnist example 2020-10-19 01:16:01 +08:00
simple tests, repr not str 2020-10-19 05:55:20 +08:00			`# create a model`
update readme 2020-10-19 04:08:14 +08:00			`class TinyBobNet:`
			`def __init__(self):`
refactor into utils 2020-10-19 05:36:29 +08:00			`self.l1 = Tensor(layer_init_uniform(784, 128))`
			`self.l2 = Tensor(layer_init_uniform(128, 10))`
update readme 2020-10-19 04:08:14 +08:00
			`def forward(self, x):`
			`return x.dot(self.l1).relu().dot(self.l2).logsoftmax()`

add reshape support and OMG the CONVS are SO SLOW 2020-10-22 00:12:19 +08:00			`# create a model with a conv layer`
			`class TinyConvNet:`
			`def __init__(self):`
faster and better convnet 2020-10-26 04:48:44 +08:00			`# https://keras.io/examples/vision/mnist_convnet/`
			`conv = 3`
			`#inter_chan, out_chan = 32, 64`
			`inter_chan, out_chan = 8, 16 # for speed`
			`self.c1 = Tensor(layer_init_uniform(inter_chan,1,conv,conv))`
			`self.c2 = Tensor(layer_init_uniform(out_chan,inter_chan,conv,conv))`
			`self.l1 = Tensor(layer_init_uniform(out_chan55, 10))`
add reshape support and OMG the CONVS are SO SLOW 2020-10-22 00:12:19 +08:00
			`def forward(self, x):`
			`x.data = x.data.reshape((-1, 1, 28, 28)) # hacks`
rename max_pool2d to match torch, remove more fast conv crap 2020-10-26 08:16:47 +08:00			`x = x.conv2d(self.c1).relu().max_pool2d()`
			`x = x.conv2d(self.c2).relu().max_pool2d()`
if you wait 24 seconds, that gets 98% 2020-10-23 12:49:14 +08:00			`x = x.reshape(Tensor(np.array((x.shape[0], -1))))`
faster and better convnet 2020-10-26 04:48:44 +08:00			`return x.dot(self.l1).logsoftmax()`
add reshape support and OMG the CONVS are SO SLOW 2020-10-22 00:12:19 +08:00
clean up test 2020-10-23 21:11:38 +08:00			`def train(model, optim, steps, BS=128):`
			`losses, accuracies = [], []`
seems more generic 2020-10-26 07:40:37 +08:00			`for i in (t := trange(steps, disable=os.getenv('CI') is not None)):`
clean up test 2020-10-23 21:11:38 +08:00			`samp = np.random.randint(0, X_train.shape[0], size=(BS))`

			`x = Tensor(X_train[samp].reshape((-1, 28*28)).astype(np.float32))`
			`Y = Y_train[samp]`
			`y = np.zeros((len(samp),10), np.float32)`
			`# correct loss for NLL, torch NLL loss returns one per row`
			`y[range(y.shape[0]),Y] = -10.0`
			`y = Tensor(y)`

			`# network`
			`out = model.forward(x)`
add mnist example 2020-10-19 01:16:01 +08:00
clean up test 2020-10-23 21:11:38 +08:00			`# NLL loss function`
			`loss = out.mul(y).mean()`
			`loss.backward()`
			`optim.step()`

			`cat = np.argmax(out.data, axis=1)`
			`accuracy = (cat == Y).mean()`

			`# printing`
			`loss = loss.data`
			`losses.append(loss)`
			`accuracies.append(accuracy)`
			`t.set_description("loss %.2f accuracy %.2f" % (loss, accuracy))`
add mnist example 2020-10-19 01:16:01 +08:00
clean up test 2020-10-23 21:11:38 +08:00			`def evaluate(model):`
			`def numpy_eval():`
			`Y_test_preds_out = model.forward(Tensor(X_test.reshape((-1, 28*28)).astype(np.float32)))`
			`Y_test_preds = np.argmax(Y_test_preds_out.data, axis=1)`
			`return (Y_test == Y_test_preds).mean()`
readme and dirs 2020-10-19 03:48:17 +08:00
clean up test 2020-10-23 21:11:38 +08:00			`accuracy = numpy_eval()`
			`print("test set accuracy is %f" % accuracy)`
			`assert accuracy > 0.95`
Make test_mnist runnable by pytest and directly 2020-10-22 00:30:08 +08:00
clean up test 2020-10-23 21:11:38 +08:00			`class TestMNIST(unittest.TestCase):`
wow, you have to name them test 2020-10-23 21:33:18 +08:00			`def test_conv(self):`
clean up test 2020-10-23 21:11:38 +08:00			`np.random.seed(1337)`
all three 2020-10-23 17:53:01 +08:00			`model = TinyConvNet()`
faster and better convnet 2020-10-26 04:48:44 +08:00			`optimizer = optim.Adam([model.c1, model.c2, model.l1], lr=0.001)`
clean up test 2020-10-23 21:11:38 +08:00			`train(model, optimizer, steps=400)`
all three 2020-10-23 17:53:01 +08:00			`evaluate(model)`

wow, you have to name them test 2020-10-23 21:33:18 +08:00			`def test_sgd(self):`
clean up test 2020-10-23 21:11:38 +08:00			`np.random.seed(1337)`
all three 2020-10-23 17:53:01 +08:00			`model = TinyBobNet()`
update rsmprop 2020-10-23 20:46:45 +08:00			`optimizer = optim.SGD([model.l1, model.l2], lr=0.001)`
clean up test 2020-10-23 21:11:38 +08:00			`train(model, optimizer, steps=1000)`
all three 2020-10-23 17:53:01 +08:00			`evaluate(model)`

wow, you have to name them test 2020-10-23 21:33:18 +08:00			`def test_rmsprop(self):`
clean up test 2020-10-23 21:11:38 +08:00			`np.random.seed(1337)`
update rsmprop 2020-10-23 20:46:45 +08:00			`model = TinyBobNet()`
low lr improves rmsprop 2020-10-23 21:22:32 +08:00			`optimizer = optim.RMSprop([model.l1, model.l2], lr=0.0002)`
clean up test 2020-10-23 21:11:38 +08:00			`train(model, optimizer, steps=1000)`
all three 2020-10-23 17:53:01 +08:00			`evaluate(model)`
incorporate changes 2020-10-22 02:21:44 +08:00
Make test_mnist runnable by pytest and directly 2020-10-22 00:30:08 +08:00			`if __name__ == '__main__':`
			`unittest.main()`
clean up test 2020-10-23 21:11:38 +08:00