2020-12-28 05:59:12 +08:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
import numpy as np
|
|
|
|
import random
|
|
|
|
|
2023-08-22 22:36:24 +08:00
|
|
|
from tinygrad.nn.state import get_parameters
|
2023-07-05 23:15:13 +08:00
|
|
|
from tinygrad.nn.optim import Adam
|
2020-12-28 07:35:56 +08:00
|
|
|
from extra.training import train, evaluate
|
2023-11-14 12:18:40 +08:00
|
|
|
from extra.models.transformer import Transformer
|
2020-12-28 07:35:56 +08:00
|
|
|
|
2023-07-03 06:06:59 +08:00
|
|
|
# dataset idea from https://github.com/karpathy/minGPT/blob/master/projects/adder/adder.py
|
2020-12-28 05:59:12 +08:00
|
|
|
def make_dataset():
|
|
|
|
ds = []
|
|
|
|
for i in range(100):
|
|
|
|
for j in range(100):
|
|
|
|
s = i+j
|
|
|
|
ds.append([i//10, i%10, j//10, j%10, s//100, (s//10)%10, s%10])
|
|
|
|
random.shuffle(ds)
|
2023-03-19 08:15:04 +08:00
|
|
|
ds = np.array(ds).astype(np.float32)
|
2020-12-28 05:59:12 +08:00
|
|
|
ds_X = ds[:, 0:6]
|
|
|
|
ds_Y = np.copy(ds[:, 1:])
|
|
|
|
ds_X_train, ds_X_test = ds_X[0:8000], ds_X[8000:]
|
|
|
|
ds_Y_train, ds_Y_test = ds_Y[0:8000], ds_Y[8000:]
|
|
|
|
return ds_X_train, ds_Y_train, ds_X_test, ds_Y_test
|
|
|
|
|
2020-12-28 07:35:56 +08:00
|
|
|
if __name__ == "__main__":
|
2021-11-30 01:40:52 +08:00
|
|
|
model = Transformer(10, 6, 2, 128, 4, 32)
|
2020-12-28 07:35:56 +08:00
|
|
|
X_train, Y_train, X_test, Y_test = make_dataset()
|
2021-01-02 23:45:09 +08:00
|
|
|
lr = 0.003
|
|
|
|
for i in range(10):
|
|
|
|
optim = Adam(get_parameters(model), lr=lr)
|
2024-01-10 15:22:41 +08:00
|
|
|
train(model, X_train, Y_train, optim, 50, BS=64, allow_jit=True)
|
2021-01-02 23:45:09 +08:00
|
|
|
acc, Y_test_preds = evaluate(model, X_test, Y_test, num_classes=10, return_predict=True)
|
|
|
|
lr /= 1.2
|
|
|
|
print(f'reducing lr to {lr:.4f}')
|
2024-01-10 15:22:41 +08:00
|
|
|
if acc > 0.998:
|
|
|
|
wrong=0
|
|
|
|
for k in range(len(Y_test_preds)):
|
|
|
|
if (Y_test_preds[k] != Y_test[k]).any():
|
|
|
|
wrong+=1
|
|
|
|
a,b,c,x = X_test[k,:2].astype(np.int32), X_test[k,2:4].astype(np.int32), Y_test[k,-3:].astype(np.int32), Y_test_preds[k,-3:].astype(np.int32)
|
|
|
|
print(f'{a[0]}{a[1]} + {b[0]}{b[1]} = {x[0]}{x[1]}{x[2]} (correct: {c[0]}{c[1]}{c[2]})')
|
|
|
|
print(f'Wrong predictions: {wrong}, acc = {acc:.4f}')
|