2023-12-03 05:29:40 +08:00
|
|
|
# tinygrad is a tensor library, and as a tensor library it has multiple parts
|
|
|
|
# 1. a "runtime". this allows buffer management, compilation, and running programs
|
|
|
|
# 2. a "Device" that uses the runtime but specifies compute in an abstract way for all
|
|
|
|
# 3. a "LazyBuffer" that fuses the compute into kernels, using memory only when needed
|
|
|
|
# 4. a "Tensor" that provides an easy to use frontend with autograd ".backward()"
|
2023-12-02 16:32:25 +08:00
|
|
|
|
|
|
|
|
2023-12-03 05:29:40 +08:00
|
|
|
print("******** first, the runtime ***********")
|
2023-12-02 16:32:25 +08:00
|
|
|
|
2024-01-27 15:36:40 +08:00
|
|
|
from tinygrad.runtime.ops_clang import ClangProgram, ClangCompiler, MallocAllocator
|
2023-12-02 16:32:25 +08:00
|
|
|
|
|
|
|
# allocate some buffers
|
2023-12-03 05:29:40 +08:00
|
|
|
out = MallocAllocator.alloc(4)
|
|
|
|
a = MallocAllocator.alloc(4)
|
|
|
|
b = MallocAllocator.alloc(4)
|
2023-12-02 16:32:25 +08:00
|
|
|
|
2023-12-03 05:29:40 +08:00
|
|
|
# load in some values (little endian)
|
2023-12-02 16:32:25 +08:00
|
|
|
MallocAllocator.copyin(a, bytearray([2,0,0,0]))
|
|
|
|
MallocAllocator.copyin(b, bytearray([3,0,0,0]))
|
|
|
|
|
2023-12-03 05:29:40 +08:00
|
|
|
# compile a program to a binary
|
2024-01-27 15:36:40 +08:00
|
|
|
lib = ClangCompiler().compile("void add(int *out, int *a, int *b) { out[0] = a[0] + b[0]; }")
|
2023-12-03 05:29:40 +08:00
|
|
|
|
|
|
|
# create a runtime for the program (ctypes.CDLL)
|
|
|
|
fxn = ClangProgram("add", lib)
|
2023-12-02 16:32:25 +08:00
|
|
|
|
|
|
|
# run the program
|
|
|
|
fxn(out, a, b)
|
|
|
|
|
2023-12-03 05:29:40 +08:00
|
|
|
# check the data out
|
|
|
|
print(val := MallocAllocator.as_buffer(out).cast("I").tolist()[0])
|
|
|
|
assert val == 5
|
2023-12-02 16:32:25 +08:00
|
|
|
|
|
|
|
|
2023-12-03 05:29:40 +08:00
|
|
|
print("******** second, the Device ***********")
|
|
|
|
|
|
|
|
DEVICE = "CLANG" # NOTE: you can change this!
|
|
|
|
|
|
|
|
import struct
|
2024-01-02 06:58:48 +08:00
|
|
|
from tinygrad.dtype import dtypes
|
2023-12-02 16:32:25 +08:00
|
|
|
from tinygrad.device import Buffer, Device
|
|
|
|
from tinygrad.ops import LazyOp, BufferOps, MemBuffer, BinaryOps
|
|
|
|
from tinygrad.shape.shapetracker import ShapeTracker
|
|
|
|
|
|
|
|
# allocate some buffers + load in values
|
2024-03-29 04:33:47 +08:00
|
|
|
out = Buffer(DEVICE, 1, dtypes.int32).allocate()
|
|
|
|
a = Buffer(DEVICE, 1, dtypes.int32).allocate().copyin(memoryview(bytearray(struct.pack("I", 2))))
|
|
|
|
b = Buffer(DEVICE, 1, dtypes.int32).allocate().copyin(memoryview(bytearray(struct.pack("I", 3))))
|
2023-12-03 05:29:40 +08:00
|
|
|
# NOTE: a._buf is the same as the return from MallocAllocator.alloc
|
2023-12-02 16:32:25 +08:00
|
|
|
|
|
|
|
# describe the computation
|
|
|
|
ld_1 = LazyOp(BufferOps.LOAD, (), MemBuffer(1, dtypes.int32, ShapeTracker.from_shape((1,))))
|
|
|
|
ld_2 = LazyOp(BufferOps.LOAD, (), MemBuffer(2, dtypes.int32, ShapeTracker.from_shape((1,))))
|
|
|
|
alu = LazyOp(BinaryOps.ADD, (ld_1, ld_2))
|
|
|
|
st_0 = LazyOp(BufferOps.STORE, (alu,), MemBuffer(0, dtypes.int32, ShapeTracker.from_shape((1,))))
|
|
|
|
|
2023-12-03 05:29:40 +08:00
|
|
|
# convert the computation to a "linearized" format (print the format)
|
|
|
|
lin = Device[DEVICE].get_linearizer(st_0).linearize()
|
|
|
|
for u in lin.uops: print(u)
|
|
|
|
|
|
|
|
# compile a program (and print the source)
|
|
|
|
fxn = Device[DEVICE].to_program(lin)
|
|
|
|
print(fxn.prg)
|
|
|
|
# NOTE: fxn.clprg is the ClangProgram
|
2023-12-02 16:32:25 +08:00
|
|
|
|
|
|
|
# run the program
|
|
|
|
fxn.exec([out, a, b])
|
2023-12-03 05:29:40 +08:00
|
|
|
|
|
|
|
# check the data out
|
2024-01-15 11:36:05 +08:00
|
|
|
assert out.as_buffer().cast('I')[0] == 5
|
2023-12-02 16:32:25 +08:00
|
|
|
|
|
|
|
|
2023-12-03 05:29:40 +08:00
|
|
|
print("******** third, the LazyBuffer ***********")
|
2023-12-02 16:32:25 +08:00
|
|
|
|
2024-01-15 10:21:08 +08:00
|
|
|
from tinygrad.lazy import LazyBuffer, LoadOps
|
2024-03-27 12:02:46 +08:00
|
|
|
from tinygrad.engine.realize import run_schedule
|
|
|
|
from tinygrad.engine.schedule import create_schedule
|
2023-12-02 16:32:25 +08:00
|
|
|
|
|
|
|
# allocate some values + load in values
|
2024-02-24 20:51:39 +08:00
|
|
|
a = LazyBuffer.loadop(LoadOps.EMPTY, (1,), dtypes.int32, DEVICE)
|
|
|
|
b = LazyBuffer.loadop(LoadOps.EMPTY, (1,), dtypes.int32, DEVICE)
|
2024-03-29 04:33:47 +08:00
|
|
|
a.realized = Buffer(DEVICE, 1, dtypes.int32).allocate().copyin(memoryview(bytearray(struct.pack("I", 2))))
|
|
|
|
b.realized = Buffer(DEVICE, 1, dtypes.int32).allocate().copyin(memoryview(bytearray(struct.pack("I", 3))))
|
2023-12-02 16:32:25 +08:00
|
|
|
|
|
|
|
# describe the computation
|
|
|
|
out = a.e(BinaryOps.ADD, b)
|
|
|
|
|
2023-12-03 05:29:40 +08:00
|
|
|
# schedule the computation as a list of kernels
|
2024-02-13 01:10:45 +08:00
|
|
|
sched = create_schedule([out])
|
2024-03-13 23:59:38 +08:00
|
|
|
for si in sched: print(si.ast[0].op) # NOTE: the first two convert it to CLANG
|
2023-12-03 05:29:40 +08:00
|
|
|
|
|
|
|
# DEBUGGING: print the compute ast as a tree
|
2024-02-13 00:34:34 +08:00
|
|
|
from tinygrad.features.graph import print_tree
|
2024-03-13 23:59:38 +08:00
|
|
|
print_tree(sched[-1].ast[0])
|
2023-12-03 05:29:40 +08:00
|
|
|
# NOTE: sched[-1].ast is the same as st_0 above
|
2023-12-02 16:32:25 +08:00
|
|
|
|
|
|
|
# run that schedule
|
|
|
|
run_schedule(sched)
|
|
|
|
|
2023-12-03 05:29:40 +08:00
|
|
|
# check the data out
|
2024-01-15 11:36:05 +08:00
|
|
|
assert out.realized.as_buffer().cast('I')[0] == 5
|
2023-12-02 16:32:25 +08:00
|
|
|
|
|
|
|
|
2023-12-03 05:29:40 +08:00
|
|
|
print("******** fourth, the Tensor ***********")
|
2023-12-02 16:32:25 +08:00
|
|
|
|
2023-12-03 05:29:40 +08:00
|
|
|
from tinygrad import Tensor
|
2023-12-02 16:32:25 +08:00
|
|
|
|
2023-12-03 05:29:40 +08:00
|
|
|
a = Tensor([2], dtype=dtypes.int32, device=DEVICE)
|
|
|
|
b = Tensor([3], dtype=dtypes.int32, device=DEVICE)
|
|
|
|
out = a + b
|
2023-12-02 16:32:25 +08:00
|
|
|
|
2023-12-03 05:29:40 +08:00
|
|
|
# check the data out
|
|
|
|
print(val:=out.item())
|
|
|
|
assert val == 5
|