use numba to double conv speed

This commit is contained in:
George Hotz 2020-10-21 09:34:16 -07:00
parent dc325af392
commit a68ead09c0
2 changed files with 33 additions and 24 deletions

View File

@ -73,7 +73,7 @@ for i in (t := trange(steps)):
# evaluate
def numpy_eval():
Y_test_preds_out = model.forward(Tensor(X_test.reshape((-1, 28*28))))
Y_test_preds_out = model.forward(Tensor(X_test.reshape((-1, 28*28)).astype(np.float32)))
Y_test_preds = np.argmax(Y_test_preds_out.data, axis=1)
return (Y_test == Y_test_preds).mean()

View File

@ -1,6 +1,7 @@
# inspired by https://github.com/karpathy/micrograd/blob/master/micrograd/engine.py
from functools import partialmethod
import numpy as np
from numba import jit, float32
# **** start with two base classes ****
@ -170,35 +171,43 @@ class LogSoftmax(Function):
return grad_output - np.exp(output)*grad_output.sum(axis=1).reshape((-1, 1))
register('logsoftmax', LogSoftmax)
@jit(nopython=True)
def conv2d_inner_forward(x, w):
cout,cin,H,W = w.shape
ret = np.zeros((x.shape[0], cout, x.shape[2]-(H-1), x.shape[3]-(W-1)), dtype=w.dtype)
for j in range(H):
for i in range(W):
tw = w[:, :, j, i]
for Y in range(ret.shape[2]):
for X in range(ret.shape[3]):
ret[:, :, Y, X] += x[:, :, Y+j, X+i].dot(tw.T)
return ret
@jit(nopython=True)
def conv2d_inner_backward(grad_output, x, w):
dx = np.zeros_like(x)
dw = np.zeros_like(w)
cout,cin,H,W = w.shape
for j in range(H):
for i in range(W):
tw = w[:, :, j, i]
for Y in range(grad_output.shape[2]):
for X in range(grad_output.shape[3]):
gg = grad_output[:, :, Y, X]
tx = x[:, :, Y+j, X+i]
dx[:, :, Y+j, X+i] += gg.dot(tw)
dw[:, :, j, i] += gg.T.dot(tx)
return dx, dw
class Conv2D(Function):
@staticmethod
def forward(ctx, x, w):
ctx.save_for_backward(x, w)
cout,cin,H,W = w.shape
ret = np.zeros((x.shape[0], cout, x.shape[2]-(H-1), x.shape[3]-(W-1)), dtype=w.dtype)
for j in range(H):
for i in range(W):
tw = w[:, :, j, i]
for Y in range(ret.shape[2]):
for X in range(ret.shape[3]):
ret[:, :, Y, X] += x[:, :, Y+j, X+i].dot(tw.T)
return ret
return conv2d_inner_forward(x, w)
@staticmethod
def backward(ctx, grad_output):
x, w = ctx.saved_tensors
dx = np.zeros_like(x)
dw = np.zeros_like(w)
cout,cin,H,W = w.shape
for j in range(H):
for i in range(W):
tw = w[:, :, j, i]
for Y in range(grad_output.shape[2]):
for X in range(grad_output.shape[3]):
gg = grad_output[:, :, Y, X]
tx = x[:, :, Y+j, X+i]
dx[:, :, Y+j, X+i] += gg.dot(tw)
dw[:, :, j, i] += gg.T.dot(tx)
return dx, dw
return conv2d_inner_backward(grad_output, *ctx.saved_tensors)
register('conv2d', Conv2D)