tinygrad/test/test_mnist.py

#!/usr/bin/env python
import os
import unittest
import numpy as np
from tinygrad.tensor import Tensor, GPU
from tinygrad.utils import layer_init_uniform, fetch
import tinygrad.optim as optim
from tqdm import trange

# mnist loader
def fetch_mnist():
  import gzip
  parse = lambda dat: np.frombuffer(gzip.decompress(dat), dtype=np.uint8).copy()
  X_train = parse(fetch("http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz"))[0x10:].reshape((-1, 28, 28))
  Y_train = parse(fetch("http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz"))[8:]
  X_test = parse(fetch("http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz"))[0x10:].reshape((-1, 28, 28))
  Y_test = parse(fetch("http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz"))[8:]
  return X_train, Y_train, X_test, Y_test

# load the mnist dataset
X_train, Y_train, X_test, Y_test = fetch_mnist()

# create a model
class TinyBobNet:
  def __init__(self):
    self.l1 = Tensor(layer_init_uniform(784, 128))
    self.l2 = Tensor(layer_init_uniform(128, 10))

  def parameters(self):
    return [self.l1, self.l2]

  def forward(self, x):
    return x.dot(self.l1).relu().dot(self.l2).logsoftmax()

# create a model with a conv layer
class TinyConvNet:
  def __init__(self):
    # https://keras.io/examples/vision/mnist_convnet/
    conv = 3
    #inter_chan, out_chan = 32, 64
    inter_chan, out_chan = 8, 16   # for speed
    self.c1 = Tensor(layer_init_uniform(inter_chan,1,conv,conv))
    self.c2 = Tensor(layer_init_uniform(out_chan,inter_chan,conv,conv))
    self.l1 = Tensor(layer_init_uniform(out_chan*5*5, 10))

  def parameters(self):
    return [self.l1, self.c1, self.c2]

  def forward(self, x):
    x = x.reshape(shape=(-1, 1, 28, 28)) # hacks
    x = x.conv2d(self.c1).relu().max_pool2d()
    x = x.conv2d(self.c2).relu().max_pool2d()
    x = x.reshape(shape=[x.shape[0], -1])
    return x.dot(self.l1).logsoftmax()

def train(model, optim, steps, BS=128, gpu=False):
  losses, accuracies = [], []
  for i in (t := trange(steps, disable=os.getenv('CI') is not None)):
    optim.zero_grad()
    samp = np.random.randint(0, X_train.shape[0], size=(BS))

    x = Tensor(X_train[samp].reshape((-1, 28*28)).astype(np.float32), gpu=gpu)
    Y = Y_train[samp]
    y = np.zeros((len(samp),10), np.float32)
    # correct loss for NLL, torch NLL loss returns one per row
    y[range(y.shape[0]),Y] = -10.0
    y = Tensor(y, gpu=gpu)

    # network
    out = model.forward(x)

    # NLL loss function
    loss = out.mul(y).mean()
    loss.backward()
    optim.step()

    cat = np.argmax(out.cpu().data, axis=1)
    accuracy = (cat == Y).mean()

    # printing
    loss = loss.cpu().data
    losses.append(loss)
    accuracies.append(accuracy)
    t.set_description("loss %.2f accuracy %.2f" % (loss, accuracy))

def evaluate(model, gpu=False):
  def numpy_eval():
    Y_test_preds_out = model.forward(Tensor(X_test.reshape((-1, 28*28)).astype(np.float32), gpu=gpu)).cpu()
    Y_test_preds = np.argmax(Y_test_preds_out.data, axis=1)
    return (Y_test == Y_test_preds).mean()

  accuracy = numpy_eval()
  print("test set accuracy is %f" % accuracy)
  assert accuracy > 0.95

class TestMNIST(unittest.TestCase):
  @unittest.skipUnless(GPU, "Requires GPU")
  def test_conv_gpu(self):
    np.random.seed(1337)
    model = TinyConvNet()
    [x.cuda_() for x in model.parameters()]
    optimizer = optim.SGD(model.parameters(), lr=0.001)
    train(model, optimizer, steps=1000, gpu=True)
    evaluate(model, gpu=True)

  def test_conv(self):
    np.random.seed(1337)
    model = TinyConvNet()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    train(model, optimizer, steps=200)
    evaluate(model)

  @unittest.skipUnless(GPU, "Requires GPU")
  def test_sgd_gpu(self):
    np.random.seed(1337)
    model = TinyBobNet()
    [x.cuda_() for x in model.parameters()]
    optimizer = optim.SGD(model.parameters(), lr=0.001)
    train(model, optimizer, steps=1000, gpu=True)
    evaluate(model, gpu=True)

  def test_sgd(self):
    np.random.seed(1337)
    model = TinyBobNet()
    optimizer = optim.SGD(model.parameters(), lr=0.001)
    train(model, optimizer, steps=1000)
    evaluate(model)

  def test_rmsprop(self):
    np.random.seed(1337)
    model = TinyBobNet()
    optimizer = optim.RMSprop(model.parameters(), lr=0.0002)
    train(model, optimizer, steps=1000)
    evaluate(model)

if __name__ == '__main__':
  unittest.main()
add mnist example 2020-10-19 01:16:01 +08:00			`#!/usr/bin/env python`
add reshape support and OMG the CONVS are SO SLOW 2020-10-22 00:12:19 +08:00			`import os`
Make test_mnist runnable by pytest and directly 2020-10-22 00:30:08 +08:00			`import unittest`
add mnist example 2020-10-19 01:16:01 +08:00			`import numpy as np`
test mnist on GPU 2020-11-01 23:46:17 +08:00			`from tinygrad.tensor import Tensor, GPU`
move the mnist loader out of tinygrad proper 2020-11-11 07:37:39 +08:00			`from tinygrad.utils import layer_init_uniform, fetch`
update rsmprop 2020-10-23 20:46:45 +08:00			`import tinygrad.optim as optim`
add mnist example 2020-10-19 01:16:01 +08:00			`from tqdm import trange`

move the mnist loader out of tinygrad proper 2020-11-11 07:37:39 +08:00			`# mnist loader`
			`def fetch_mnist():`
			`import gzip`
			`parse = lambda dat: np.frombuffer(gzip.decompress(dat), dtype=np.uint8).copy()`
			`X_train = parse(fetch("http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz"))[0x10:].reshape((-1, 28, 28))`
			`Y_train = parse(fetch("http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz"))[8:]`
			`X_test = parse(fetch("http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz"))[0x10:].reshape((-1, 28, 28))`
			`Y_test = parse(fetch("http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz"))[8:]`
			`return X_train, Y_train, X_test, Y_test`

add mnist example 2020-10-19 01:16:01 +08:00			`# load the mnist dataset`
refactor into a few files 2020-10-19 04:30:25 +08:00			`X_train, Y_train, X_test, Y_test = fetch_mnist()`
add mnist example 2020-10-19 01:16:01 +08:00
simple tests, repr not str 2020-10-19 05:55:20 +08:00			`# create a model`
update readme 2020-10-19 04:08:14 +08:00			`class TinyBobNet:`
			`def __init__(self):`
refactor into utils 2020-10-19 05:36:29 +08:00			`self.l1 = Tensor(layer_init_uniform(784, 128))`
			`self.l2 = Tensor(layer_init_uniform(128, 10))`
update readme 2020-10-19 04:08:14 +08:00
parameters, and start on efficientnet 2020-10-27 23:53:35 +08:00			`def parameters(self):`
			`return [self.l1, self.l2]`

update readme 2020-10-19 04:08:14 +08:00			`def forward(self, x):`
			`return x.dot(self.l1).relu().dot(self.l2).logsoftmax()`

add reshape support and OMG the CONVS are SO SLOW 2020-10-22 00:12:19 +08:00			`# create a model with a conv layer`
			`class TinyConvNet:`
			`def __init__(self):`
faster and better convnet 2020-10-26 04:48:44 +08:00			`# https://keras.io/examples/vision/mnist_convnet/`
			`conv = 3`
			`#inter_chan, out_chan = 32, 64`
			`inter_chan, out_chan = 8, 16 # for speed`
			`self.c1 = Tensor(layer_init_uniform(inter_chan,1,conv,conv))`
			`self.c2 = Tensor(layer_init_uniform(out_chan,inter_chan,conv,conv))`
			`self.l1 = Tensor(layer_init_uniform(out_chan55, 10))`
add reshape support and OMG the CONVS are SO SLOW 2020-10-22 00:12:19 +08:00
parameters, and start on efficientnet 2020-10-27 23:53:35 +08:00			`def parameters(self):`
			`return [self.l1, self.c1, self.c2]`

add reshape support and OMG the CONVS are SO SLOW 2020-10-22 00:12:19 +08:00			`def forward(self, x):`
getting convs to work on gpu 2020-11-08 01:17:57 +08:00			`x = x.reshape(shape=(-1, 1, 28, 28)) # hacks`
rename max_pool2d to match torch, remove more fast conv crap 2020-10-26 08:16:47 +08:00			`x = x.conv2d(self.c1).relu().max_pool2d()`
			`x = x.conv2d(self.c2).relu().max_pool2d()`
shape has to be a kwarg now, idk why this didn't break before 2020-10-29 23:13:05 +08:00			`x = x.reshape(shape=[x.shape[0], -1])`
faster and better convnet 2020-10-26 04:48:44 +08:00			`return x.dot(self.l1).logsoftmax()`
add reshape support and OMG the CONVS are SO SLOW 2020-10-22 00:12:19 +08:00
test mnist on GPU 2020-11-01 23:46:17 +08:00			`def train(model, optim, steps, BS=128, gpu=False):`
clean up test 2020-10-23 21:11:38 +08:00			`losses, accuracies = [], []`
seems more generic 2020-10-26 07:40:37 +08:00			`for i in (t := trange(steps, disable=os.getenv('CI') is not None)):`
Topological sort, zero_grads (#119) * Topological sort, zero_grads * Bug fix, add test * Add zero_grads * Put deepwalk function in backward * Move zero_grad to optim * Fix gradcheck hack Co-authored-by: holonomicjl <58403584+holonomicjl@users.noreply.github.com> 2020-11-16 12:25:29 +08:00			`optim.zero_grad()`
clean up test 2020-10-23 21:11:38 +08:00			`samp = np.random.randint(0, X_train.shape[0], size=(BS))`
GPU MaxPool2D.backward(); TinyConvNet train passes (#103) * no trailing whitespace * GPU MaxPool2D.backward(); TinyConvNet train passes! * Fix GPU avgpool.forward() init_val Doesn’t change result but is simpler. * Fix MaxPool GPU init_val Tests only cover random non-negative inputs. This fixes issues if negative inputs are fed to GPU MaxPool2D. Test update to follow. 2020-11-11 23:58:43 +08:00
test mnist on GPU 2020-11-01 23:46:17 +08:00			`x = Tensor(X_train[samp].reshape((-1, 28*28)).astype(np.float32), gpu=gpu)`
clean up test 2020-10-23 21:11:38 +08:00			`Y = Y_train[samp]`
			`y = np.zeros((len(samp),10), np.float32)`
			`# correct loss for NLL, torch NLL loss returns one per row`
			`y[range(y.shape[0]),Y] = -10.0`
test mnist on GPU 2020-11-01 23:46:17 +08:00			`y = Tensor(y, gpu=gpu)`
GPU MaxPool2D.backward(); TinyConvNet train passes (#103) * no trailing whitespace * GPU MaxPool2D.backward(); TinyConvNet train passes! * Fix GPU avgpool.forward() init_val Doesn’t change result but is simpler. * Fix MaxPool GPU init_val Tests only cover random non-negative inputs. This fixes issues if negative inputs are fed to GPU MaxPool2D. Test update to follow. 2020-11-11 23:58:43 +08:00
clean up test 2020-10-23 21:11:38 +08:00			`# network`
			`out = model.forward(x)`
add mnist example 2020-10-19 01:16:01 +08:00
clean up test 2020-10-23 21:11:38 +08:00			`# NLL loss function`
			`loss = out.mul(y).mean()`
			`loss.backward()`
			`optim.step()`
GPU MaxPool2D.backward(); TinyConvNet train passes (#103) * no trailing whitespace * GPU MaxPool2D.backward(); TinyConvNet train passes! * Fix GPU avgpool.forward() init_val Doesn’t change result but is simpler. * Fix MaxPool GPU init_val Tests only cover random non-negative inputs. This fixes issues if negative inputs are fed to GPU MaxPool2D. Test update to follow. 2020-11-11 23:58:43 +08:00
test mnist on GPU 2020-11-01 23:46:17 +08:00			`cat = np.argmax(out.cpu().data, axis=1)`
clean up test 2020-10-23 21:11:38 +08:00			`accuracy = (cat == Y).mean()`
GPU MaxPool2D.backward(); TinyConvNet train passes (#103) * no trailing whitespace * GPU MaxPool2D.backward(); TinyConvNet train passes! * Fix GPU avgpool.forward() init_val Doesn’t change result but is simpler. * Fix MaxPool GPU init_val Tests only cover random non-negative inputs. This fixes issues if negative inputs are fed to GPU MaxPool2D. Test update to follow. 2020-11-11 23:58:43 +08:00
clean up test 2020-10-23 21:11:38 +08:00			`# printing`
test mnist on GPU 2020-11-01 23:46:17 +08:00			`loss = loss.cpu().data`
clean up test 2020-10-23 21:11:38 +08:00			`losses.append(loss)`
			`accuracies.append(accuracy)`
			`t.set_description("loss %.2f accuracy %.2f" % (loss, accuracy))`
add mnist example 2020-10-19 01:16:01 +08:00
test mnist on GPU 2020-11-01 23:46:17 +08:00			`def evaluate(model, gpu=False):`
clean up test 2020-10-23 21:11:38 +08:00			`def numpy_eval():`
test mnist on GPU 2020-11-01 23:46:17 +08:00			`Y_test_preds_out = model.forward(Tensor(X_test.reshape((-1, 28*28)).astype(np.float32), gpu=gpu)).cpu()`
clean up test 2020-10-23 21:11:38 +08:00			`Y_test_preds = np.argmax(Y_test_preds_out.data, axis=1)`
			`return (Y_test == Y_test_preds).mean()`
readme and dirs 2020-10-19 03:48:17 +08:00
clean up test 2020-10-23 21:11:38 +08:00			`accuracy = numpy_eval()`
			`print("test set accuracy is %f" % accuracy)`
			`assert accuracy > 0.95`
Make test_mnist runnable by pytest and directly 2020-10-22 00:30:08 +08:00
clean up test 2020-10-23 21:11:38 +08:00			`class TestMNIST(unittest.TestCase):`
getting convs to work on gpu 2020-11-08 01:17:57 +08:00			`@unittest.skipUnless(GPU, "Requires GPU")`
			`def test_conv_gpu(self):`
			`np.random.seed(1337)`
			`model = TinyConvNet()`
			`[x.cuda_() for x in model.parameters()]`
			`optimizer = optim.SGD(model.parameters(), lr=0.001)`
			`train(model, optimizer, steps=1000, gpu=True)`
			`evaluate(model, gpu=True)`

wow, you have to name them test 2020-10-23 21:33:18 +08:00			`def test_conv(self):`
clean up test 2020-10-23 21:11:38 +08:00			`np.random.seed(1337)`
all three 2020-10-23 17:53:01 +08:00			`model = TinyConvNet()`
parameters, and start on efficientnet 2020-10-27 23:53:35 +08:00			`optimizer = optim.Adam(model.parameters(), lr=0.001)`
400 -> 200 2020-10-26 08:19:59 +08:00			`train(model, optimizer, steps=200)`
all three 2020-10-23 17:53:01 +08:00			`evaluate(model)`
test mnist on GPU 2020-11-01 23:46:17 +08:00
			`@unittest.skipUnless(GPU, "Requires GPU")`
			`def test_sgd_gpu(self):`
			`np.random.seed(1337)`
			`model = TinyBobNet()`
			`[x.cuda_() for x in model.parameters()]`
			`optimizer = optim.SGD(model.parameters(), lr=0.001)`
			`train(model, optimizer, steps=1000, gpu=True)`
			`evaluate(model, gpu=True)`
GPU MaxPool2D.backward(); TinyConvNet train passes (#103) * no trailing whitespace * GPU MaxPool2D.backward(); TinyConvNet train passes! * Fix GPU avgpool.forward() init_val Doesn’t change result but is simpler. * Fix MaxPool GPU init_val Tests only cover random non-negative inputs. This fixes issues if negative inputs are fed to GPU MaxPool2D. Test update to follow. 2020-11-11 23:58:43 +08:00
wow, you have to name them test 2020-10-23 21:33:18 +08:00			`def test_sgd(self):`
clean up test 2020-10-23 21:11:38 +08:00			`np.random.seed(1337)`
all three 2020-10-23 17:53:01 +08:00			`model = TinyBobNet()`
parameters, and start on efficientnet 2020-10-27 23:53:35 +08:00			`optimizer = optim.SGD(model.parameters(), lr=0.001)`
clean up test 2020-10-23 21:11:38 +08:00			`train(model, optimizer, steps=1000)`
all three 2020-10-23 17:53:01 +08:00			`evaluate(model)`
GPU MaxPool2D.backward(); TinyConvNet train passes (#103) * no trailing whitespace * GPU MaxPool2D.backward(); TinyConvNet train passes! * Fix GPU avgpool.forward() init_val Doesn’t change result but is simpler. * Fix MaxPool GPU init_val Tests only cover random non-negative inputs. This fixes issues if negative inputs are fed to GPU MaxPool2D. Test update to follow. 2020-11-11 23:58:43 +08:00
wow, you have to name them test 2020-10-23 21:33:18 +08:00			`def test_rmsprop(self):`
clean up test 2020-10-23 21:11:38 +08:00			`np.random.seed(1337)`
update rsmprop 2020-10-23 20:46:45 +08:00			`model = TinyBobNet()`
parameters, and start on efficientnet 2020-10-27 23:53:35 +08:00			`optimizer = optim.RMSprop(model.parameters(), lr=0.0002)`
clean up test 2020-10-23 21:11:38 +08:00			`train(model, optimizer, steps=1000)`
all three 2020-10-23 17:53:01 +08:00			`evaluate(model)`
incorporate changes 2020-10-22 02:21:44 +08:00
Make test_mnist runnable by pytest and directly 2020-10-22 00:30:08 +08:00			`if __name__ == '__main__':`
			`unittest.main()`