use numba to double conv speed

2020-10-21 09:34:16 -07:00 · 2020-10-21 09:34:16 -07:00 · a68ead09c0
parent dc325af392
commit a68ead09c0
2 changed files with 33 additions and 24 deletions
--- a/test/mnist.py
+++ b/test/mnist.py
@ -73,7 +73,7 @@ for i in (t := trange(steps)):

 # evaluate
 def numpy_eval():
-  Y_test_preds_out = model.forward(Tensor(X_test.reshape((-1, 28*28))))
+  Y_test_preds_out = model.forward(Tensor(X_test.reshape((-1, 28*28)).astype(np.float32)))
  Y_test_preds = np.argmax(Y_test_preds_out.data, axis=1)
  return (Y_test == Y_test_preds).mean()

--- a/tinygrad/tensor.py
+++ b/tinygrad/tensor.py
@ -1,6 +1,7 @@
 # inspired by https://github.com/karpathy/micrograd/blob/master/micrograd/engine.py
 from functools import partialmethod
 import numpy as np
+from numba import jit, float32

 # **** start with two base classes ****

@ -170,35 +171,43 @@ class LogSoftmax(Function):
    return grad_output - np.exp(output)*grad_output.sum(axis=1).reshape((-1, 1))
 register('logsoftmax', LogSoftmax)

+
+@jit(nopython=True)
+def conv2d_inner_forward(x, w):
+  cout,cin,H,W = w.shape
+  ret = np.zeros((x.shape[0], cout, x.shape[2]-(H-1), x.shape[3]-(W-1)), dtype=w.dtype)
+  for j in range(H):
+    for i in range(W):
+      tw = w[:, :, j, i]
+      for Y in range(ret.shape[2]):
+        for X in range(ret.shape[3]):
+          ret[:, :, Y, X] += x[:, :, Y+j, X+i].dot(tw.T)
+  return ret
+
+@jit(nopython=True)
+def conv2d_inner_backward(grad_output, x, w):
+  dx = np.zeros_like(x)
+  dw = np.zeros_like(w)
+  cout,cin,H,W = w.shape
+  for j in range(H):
+    for i in range(W):
+      tw = w[:, :, j, i]
+      for Y in range(grad_output.shape[2]):
+        for X in range(grad_output.shape[3]):
+          gg = grad_output[:, :, Y, X]
+          tx = x[:, :, Y+j, X+i]
+          dx[:, :, Y+j, X+i] += gg.dot(tw)
+          dw[:, :, j, i] += gg.T.dot(tx)
+  return dx, dw
+
 class Conv2D(Function):
  @staticmethod
  def forward(ctx, x, w):
    ctx.save_for_backward(x, w)
-    cout,cin,H,W = w.shape
-    ret = np.zeros((x.shape[0], cout, x.shape[2]-(H-1), x.shape[3]-(W-1)), dtype=w.dtype)
-    for j in range(H):
-      for i in range(W):
-        tw = w[:, :, j, i]
-        for Y in range(ret.shape[2]):
-          for X in range(ret.shape[3]):
-            ret[:, :, Y, X] += x[:, :, Y+j, X+i].dot(tw.T)
-    return ret
+    return conv2d_inner_forward(x, w)

  @staticmethod
  def backward(ctx, grad_output):
-    x, w = ctx.saved_tensors
-    dx = np.zeros_like(x)
-    dw = np.zeros_like(w)
-    cout,cin,H,W = w.shape
-    for j in range(H):
-      for i in range(W):
-        tw = w[:, :, j, i]
-        for Y in range(grad_output.shape[2]):
-          for X in range(grad_output.shape[3]):
-            gg = grad_output[:, :, Y, X]
-            tx = x[:, :, Y+j, X+i]
-            dx[:, :, Y+j, X+i] += gg.dot(tw)
-            dw[:, :, j, i] += gg.T.dot(tx)
-    return dx, dw
+    return conv2d_inner_backward(grad_output, *ctx.saved_tensors)
 register('conv2d', Conv2D)