tinygrad/docs/abstractions2.py

# tinygrad is a tensor library, and as a tensor library it has multiple parts
# 1. a "runtime". this allows buffer management, compilation, and running programs
# 2. a "Device" that uses the runtime but specifies compute in an abstract way for all
# 3. a "LazyBuffer" that fuses the compute into kernels, using memory only when needed
# 4. a "Tensor" that provides an easy to use frontend with autograd ".backward()"


print("******** first, the runtime ***********")

from tinygrad.runtime.ops_clang import ClangProgram, ClangCompiler, MallocAllocator

# allocate some buffers
out = MallocAllocator.alloc(4)
a = MallocAllocator.alloc(4)
b = MallocAllocator.alloc(4)

# load in some values (little endian)
MallocAllocator.copyin(a, bytearray([2,0,0,0]))
MallocAllocator.copyin(b, bytearray([3,0,0,0]))

# compile a program to a binary
lib = ClangCompiler().compile("void add(int *out, int *a, int *b) { out[0] = a[0] + b[0]; }")

# create a runtime for the program (ctypes.CDLL)
fxn = ClangProgram("add", lib)

# run the program
fxn(out, a, b)

# check the data out
print(val := MallocAllocator.as_buffer(out).cast("I").tolist()[0])
assert val == 5


print("******** second, the Device ***********")

DEVICE = "CLANG"   # NOTE: you can change this!

import struct
from tinygrad.dtype import dtypes
from tinygrad.device import Buffer, Device
from tinygrad.ops import LazyOp, BufferOps, MemBuffer, BinaryOps
from tinygrad.shape.shapetracker import ShapeTracker

# allocate some buffers + load in values
out = Buffer(DEVICE, 1, dtypes.int32).allocate()
a = Buffer(DEVICE, 1, dtypes.int32).allocate().copyin(memoryview(bytearray(struct.pack("I", 2))))
b = Buffer(DEVICE, 1, dtypes.int32).allocate().copyin(memoryview(bytearray(struct.pack("I", 3))))
# NOTE: a._buf is the same as the return from MallocAllocator.alloc

# describe the computation
ld_1 = LazyOp(BufferOps.LOAD, (), MemBuffer(1, dtypes.int32, ShapeTracker.from_shape((1,))))
ld_2 = LazyOp(BufferOps.LOAD, (), MemBuffer(2, dtypes.int32, ShapeTracker.from_shape((1,))))
alu = LazyOp(BinaryOps.ADD, (ld_1, ld_2))
st_0 = LazyOp(BufferOps.STORE, (alu,), MemBuffer(0, dtypes.int32, ShapeTracker.from_shape((1,))))

# convert the computation to a "linearized" format (print the format)
from tinygrad.engine.realize import get_linearizer, CompiledRunner
lin = get_linearizer(Device[DEVICE].renderer, (st_0,)).linearize()
for u in lin.uops: print(u)

# compile a program (and print the source)
fxn = CompiledRunner(lin.to_program())
print(fxn.p.src)
# NOTE: fxn.clprg is the ClangProgram

# run the program
fxn.exec([out, a, b])

# check the data out
assert out.as_buffer().cast('I')[0] == 5


print("******** third, the LazyBuffer ***********")

from tinygrad.lazy import LazyBuffer, MetaOps
from tinygrad.engine.realize import run_schedule
from tinygrad.engine.schedule import create_schedule

# allocate some values + load in values
a = LazyBuffer.loadop(MetaOps.EMPTY, (1,), dtypes.int32, DEVICE)
b = LazyBuffer.loadop(MetaOps.EMPTY, (1,), dtypes.int32, DEVICE)
a.buffer.allocate().copyin(memoryview(bytearray(struct.pack("I", 2))))
b.buffer.allocate().copyin(memoryview(bytearray(struct.pack("I", 3))))
del a.srcs
del b.srcs

# describe the computation
out = a.e(BinaryOps.ADD, b)

# schedule the computation as a list of kernels
sched = create_schedule([out])
for si in sched: print(si.ast[0].op)  # NOTE: the first two convert it to CLANG

# DEBUGGING: print the compute ast as a tree
from tinygrad.engine.graph import print_tree
print_tree(sched[-1].ast[0])
# NOTE: sched[-1].ast is the same as st_0 above

# run that schedule
run_schedule(sched)

# check the data out
assert out.realized.as_buffer().cast('I')[0] == 5


print("******** fourth, the Tensor ***********")

from tinygrad import Tensor

a = Tensor([2], dtype=dtypes.int32, device=DEVICE)
b = Tensor([3], dtype=dtypes.int32, device=DEVICE)
out = a + b

# check the data out
print(val:=out.item())
assert val == 5
No dtype alloc (#2570) * fix all allocs * improve docs * ugh fix fake alloc 2023-12-03 05:29:40 +08:00			`# tinygrad is a tensor library, and as a tensor library it has multiple parts`
			`# 1. a "runtime". this allows buffer management, compilation, and running programs`
			`# 2. a "Device" that uses the runtime but specifies compute in an abstract way for all`
			`# 3. a "LazyBuffer" that fuses the compute into kernels, using memory only when needed`
			`# 4. a "Tensor" that provides an easy to use frontend with autograd ".backward()"`
refactor to remove extra kernel params (#2563) * refactor to have compiled kernel * bugfixes * docs/beautiful.py * revert that * fix tests 2023-12-02 16:32:25 +08:00

No dtype alloc (#2570) * fix all allocs * improve docs * ugh fix fake alloc 2023-12-03 05:29:40 +08:00			`print("****** first, the runtime *********")`
refactor to remove extra kernel params (#2563) * refactor to have compiled kernel * bugfixes * docs/beautiful.py * revert that * fix tests 2023-12-02 16:32:25 +08:00
compiler support (#3260) * compiler support * revert that * fix tests 2024-01-27 15:36:40 +08:00			`from tinygrad.runtime.ops_clang import ClangProgram, ClangCompiler, MallocAllocator`
refactor to remove extra kernel params (#2563) * refactor to have compiled kernel * bugfixes * docs/beautiful.py * revert that * fix tests 2023-12-02 16:32:25 +08:00
			`# allocate some buffers`
No dtype alloc (#2570) * fix all allocs * improve docs * ugh fix fake alloc 2023-12-03 05:29:40 +08:00			`out = MallocAllocator.alloc(4)`
			`a = MallocAllocator.alloc(4)`
			`b = MallocAllocator.alloc(4)`
refactor to remove extra kernel params (#2563) * refactor to have compiled kernel * bugfixes * docs/beautiful.py * revert that * fix tests 2023-12-02 16:32:25 +08:00
No dtype alloc (#2570) * fix all allocs * improve docs * ugh fix fake alloc 2023-12-03 05:29:40 +08:00			`# load in some values (little endian)`
refactor to remove extra kernel params (#2563) * refactor to have compiled kernel * bugfixes * docs/beautiful.py * revert that * fix tests 2023-12-02 16:32:25 +08:00			`MallocAllocator.copyin(a, bytearray([2,0,0,0]))`
			`MallocAllocator.copyin(b, bytearray([3,0,0,0]))`

No dtype alloc (#2570) * fix all allocs * improve docs * ugh fix fake alloc 2023-12-03 05:29:40 +08:00			`# compile a program to a binary`
compiler support (#3260) * compiler support * revert that * fix tests 2024-01-27 15:36:40 +08:00			`lib = ClangCompiler().compile("void add(int out, int a, int *b) { out[0] = a[0] + b[0]; }")`
No dtype alloc (#2570) * fix all allocs * improve docs * ugh fix fake alloc 2023-12-03 05:29:40 +08:00
			`# create a runtime for the program (ctypes.CDLL)`
			`fxn = ClangProgram("add", lib)`
refactor to remove extra kernel params (#2563) * refactor to have compiled kernel * bugfixes * docs/beautiful.py * revert that * fix tests 2023-12-02 16:32:25 +08:00
			`# run the program`
			`fxn(out, a, b)`

No dtype alloc (#2570) * fix all allocs * improve docs * ugh fix fake alloc 2023-12-03 05:29:40 +08:00			`# check the data out`
			`print(val := MallocAllocator.as_buffer(out).cast("I").tolist()[0])`
			`assert val == 5`
refactor to remove extra kernel params (#2563) * refactor to have compiled kernel * bugfixes * docs/beautiful.py * revert that * fix tests 2023-12-02 16:32:25 +08:00

No dtype alloc (#2570) * fix all allocs * improve docs * ugh fix fake alloc 2023-12-03 05:29:40 +08:00			`print("****** second, the Device *********")`

			`DEVICE = "CLANG" # NOTE: you can change this!`

			`import struct`
move dtypes to dtype.py (#2964) * move dtypes to dtype.py * fix urllib 2024-01-02 06:58:48 +08:00			`from tinygrad.dtype import dtypes`
refactor to remove extra kernel params (#2563) * refactor to have compiled kernel * bugfixes * docs/beautiful.py * revert that * fix tests 2023-12-02 16:32:25 +08:00			`from tinygrad.device import Buffer, Device`
			`from tinygrad.ops import LazyOp, BufferOps, MemBuffer, BinaryOps`
			`from tinygrad.shape.shapetracker import ShapeTracker`

			`# allocate some buffers + load in values`
Buffer isn't always allocated (#3974) * buffer alloc * allocate * missing allocates * last one 2024-03-29 04:33:47 +08:00			`out = Buffer(DEVICE, 1, dtypes.int32).allocate()`
			`a = Buffer(DEVICE, 1, dtypes.int32).allocate().copyin(memoryview(bytearray(struct.pack("I", 2))))`
			`b = Buffer(DEVICE, 1, dtypes.int32).allocate().copyin(memoryview(bytearray(struct.pack("I", 3))))`
No dtype alloc (#2570) * fix all allocs * improve docs * ugh fix fake alloc 2023-12-03 05:29:40 +08:00			`# NOTE: a._buf is the same as the return from MallocAllocator.alloc`
refactor to remove extra kernel params (#2563) * refactor to have compiled kernel * bugfixes * docs/beautiful.py * revert that * fix tests 2023-12-02 16:32:25 +08:00
			`# describe the computation`
			`ld_1 = LazyOp(BufferOps.LOAD, (), MemBuffer(1, dtypes.int32, ShapeTracker.from_shape((1,))))`
			`ld_2 = LazyOp(BufferOps.LOAD, (), MemBuffer(2, dtypes.int32, ShapeTracker.from_shape((1,))))`
			`alu = LazyOp(BinaryOps.ADD, (ld_1, ld_2))`
			`st_0 = LazyOp(BufferOps.STORE, (alu,), MemBuffer(0, dtypes.int32, ShapeTracker.from_shape((1,))))`

No dtype alloc (#2570) * fix all allocs * improve docs * ugh fix fake alloc 2023-12-03 05:29:40 +08:00			`# convert the computation to a "linearized" format (print the format)`
all realize 2 (#4527) * all realize 2 * tests fixup * fix more tests * fix openpilot * fix tests * unneeded 2024-05-11 13:43:09 +08:00			`from tinygrad.engine.realize import get_linearizer, CompiledRunner`
			`lin = get_linearizer(Device[DEVICE].renderer, (st_0,)).linearize()`
No dtype alloc (#2570) * fix all allocs * improve docs * ugh fix fake alloc 2023-12-03 05:29:40 +08:00			`for u in lin.uops: print(u)`

			`# compile a program (and print the source)`
all realize 2 (#4527) * all realize 2 * tests fixup * fix more tests * fix openpilot * fix tests * unneeded 2024-05-11 13:43:09 +08:00			`fxn = CompiledRunner(lin.to_program())`
cleaning up search with Program (#4500) * cleaning up search * fix tests * test fix * minor compiler cleanup 2024-05-10 10:01:53 +08:00			`print(fxn.p.src)`
No dtype alloc (#2570) * fix all allocs * improve docs * ugh fix fake alloc 2023-12-03 05:29:40 +08:00			`# NOTE: fxn.clprg is the ClangProgram`
refactor to remove extra kernel params (#2563) * refactor to have compiled kernel * bugfixes * docs/beautiful.py * revert that * fix tests 2023-12-02 16:32:25 +08:00
			`# run the program`
			`fxn.exec([out, a, b])`
No dtype alloc (#2570) * fix all allocs * improve docs * ugh fix fake alloc 2023-12-03 05:29:40 +08:00
			`# check the data out`
remove numpy from device (#3123) * remove numpy from device * fix tests * np item * cleanups * simplify with as_buffer * no toCPU * tinygradic * cast to scalar 2024-01-15 11:36:05 +08:00			`assert out.as_buffer().cast('I')[0] == 5`
refactor to remove extra kernel params (#2563) * refactor to have compiled kernel * bugfixes * docs/beautiful.py * revert that * fix tests 2023-12-02 16:32:25 +08:00

No dtype alloc (#2570) * fix all allocs * improve docs * ugh fix fake alloc 2023-12-03 05:29:40 +08:00			`print("****** third, the LazyBuffer *********")`
refactor to remove extra kernel params (#2563) * refactor to have compiled kernel * bugfixes * docs/beautiful.py * revert that * fix tests 2023-12-02 16:32:25 +08:00
s/loadops/metaops [run_process_replay] (#5421) 2024-07-13 04:26:50 +08:00			`from tinygrad.lazy import LazyBuffer, MetaOps`
split to schedule.py (#3949) * split to schedule.py * split 2024-03-27 12:02:46 +08:00			`from tinygrad.engine.realize import run_schedule`
			`from tinygrad.engine.schedule import create_schedule`
refactor to remove extra kernel params (#2563) * refactor to have compiled kernel * bugfixes * docs/beautiful.py * revert that * fix tests 2023-12-02 16:32:25 +08:00
			`# allocate some values + load in values`
s/loadops/metaops [run_process_replay] (#5421) 2024-07-13 04:26:50 +08:00			`a = LazyBuffer.loadop(MetaOps.EMPTY, (1,), dtypes.int32, DEVICE)`
			`b = LazyBuffer.loadop(MetaOps.EMPTY, (1,), dtypes.int32, DEVICE)`
create the buffer with the LazyBuffer (#3977) * create the buffer with the LazyBuffer * fixes * hack underlying buffer when we change dtype * we only care about allocated buffers * asserts 2024-03-29 10:31:28 +08:00			`a.buffer.allocate().copyin(memoryview(bytearray(struct.pack("I", 2))))`
			`b.buffer.allocate().copyin(memoryview(bytearray(struct.pack("I", 3))))`
move assign logic into lazy.py (#3984) * move assign logic into lazy.py * don't check the buffer 2024-03-29 11:26:38 +08:00			`del a.srcs`
			`del b.srcs`
refactor to remove extra kernel params (#2563) * refactor to have compiled kernel * bugfixes * docs/beautiful.py * revert that * fix tests 2023-12-02 16:32:25 +08:00
			`# describe the computation`
			`out = a.e(BinaryOps.ADD, b)`

No dtype alloc (#2570) * fix all allocs * improve docs * ugh fix fake alloc 2023-12-03 05:29:40 +08:00			`# schedule the computation as a list of kernels`
move create schedule and delete old API (#3377) * move create schedule and delete old API * fix test multitensor 2024-02-13 01:10:45 +08:00			`sched = create_schedule([out])`
multioutput ScheduleItem (#3699) * refactor realize.py * update docs * update test_sched * update runners and devices * update openpilot and unit tests * cleanup runner lowering * update more tests 2024-03-13 23:59:38 +08:00			`for si in sched: print(si.ast[0].op) # NOTE: the first two convert it to CLANG`
No dtype alloc (#2570) * fix all allocs * improve docs * ugh fix fake alloc 2023-12-03 05:29:40 +08:00
			`# DEBUGGING: print the compute ast as a tree`
move graph/search to engine (#4596) 2024-05-15 14:12:59 +08:00			`from tinygrad.engine.graph import print_tree`
multioutput ScheduleItem (#3699) * refactor realize.py * update docs * update test_sched * update runners and devices * update openpilot and unit tests * cleanup runner lowering * update more tests 2024-03-13 23:59:38 +08:00			`print_tree(sched[-1].ast[0])`
No dtype alloc (#2570) * fix all allocs * improve docs * ugh fix fake alloc 2023-12-03 05:29:40 +08:00			`# NOTE: sched[-1].ast is the same as st_0 above`
refactor to remove extra kernel params (#2563) * refactor to have compiled kernel * bugfixes * docs/beautiful.py * revert that * fix tests 2023-12-02 16:32:25 +08:00
			`# run that schedule`
			`run_schedule(sched)`

No dtype alloc (#2570) * fix all allocs * improve docs * ugh fix fake alloc 2023-12-03 05:29:40 +08:00			`# check the data out`
remove numpy from device (#3123) * remove numpy from device * fix tests * np item * cleanups * simplify with as_buffer * no toCPU * tinygradic * cast to scalar 2024-01-15 11:36:05 +08:00			`assert out.realized.as_buffer().cast('I')[0] == 5`
refactor to remove extra kernel params (#2563) * refactor to have compiled kernel * bugfixes * docs/beautiful.py * revert that * fix tests 2023-12-02 16:32:25 +08:00

No dtype alloc (#2570) * fix all allocs * improve docs * ugh fix fake alloc 2023-12-03 05:29:40 +08:00			`print("****** fourth, the Tensor *********")`
refactor to remove extra kernel params (#2563) * refactor to have compiled kernel * bugfixes * docs/beautiful.py * revert that * fix tests 2023-12-02 16:32:25 +08:00
No dtype alloc (#2570) * fix all allocs * improve docs * ugh fix fake alloc 2023-12-03 05:29:40 +08:00			`from tinygrad import Tensor`
refactor to remove extra kernel params (#2563) * refactor to have compiled kernel * bugfixes * docs/beautiful.py * revert that * fix tests 2023-12-02 16:32:25 +08:00
No dtype alloc (#2570) * fix all allocs * improve docs * ugh fix fake alloc 2023-12-03 05:29:40 +08:00			`a = Tensor([2], dtype=dtypes.int32, device=DEVICE)`
			`b = Tensor([3], dtype=dtypes.int32, device=DEVICE)`
			`out = a + b`
refactor to remove extra kernel params (#2563) * refactor to have compiled kernel * bugfixes * docs/beautiful.py * revert that * fix tests 2023-12-02 16:32:25 +08:00
No dtype alloc (#2570) * fix all allocs * improve docs * ugh fix fake alloc 2023-12-03 05:29:40 +08:00			`# check the data out`
			`print(val:=out.item())`
			`assert val == 5`