mirror of https://github.com/commaai/tinygrad.git
vectorization
This commit is contained in:
parent
e473d35f90
commit
52bfbc31be
|
@ -80,13 +80,17 @@ def idx_deref(builder, buf, ptr, eidx):
|
||||||
else:
|
else:
|
||||||
return builder.load(builder.gep(ptr, [idx]))
|
return builder.load(builder.gep(ptr, [idx]))
|
||||||
|
|
||||||
|
# https://blog.christianperone.com/2022/09/tutorial-on-using-llvm-to-jit-pytorch-fx-graphs-to-native-code-x86-arm-risc-v-wasm-part-i-scalars/
|
||||||
class LLVM:
|
class LLVM:
|
||||||
target_machine = None
|
target_machine = None
|
||||||
engine = None
|
engine = None
|
||||||
optimizer = None
|
optimizer = None
|
||||||
# if it can't vectorize
|
# if it can't vectorize
|
||||||
# OPT=2 DEBUG=3 LLVM=1 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_sum_full
|
# OPT=2 DEBUG=3 LLVM=1 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_mul
|
||||||
# if can't vectorize anything
|
# if can't vectorize anything
|
||||||
|
|
||||||
|
# looks like we have two options, either use clang or handle vectorization in tinygrad
|
||||||
|
# for the sake of the GPU, we should probably do in tinygrad
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
if LLVM.engine is not None:
|
if LLVM.engine is not None:
|
||||||
return
|
return
|
||||||
|
@ -96,10 +100,12 @@ class LLVM:
|
||||||
target = llvm.Target.from_default_triple()
|
target = llvm.Target.from_default_triple()
|
||||||
LLVM.optimizer = llvm.ModulePassManager()
|
LLVM.optimizer = llvm.ModulePassManager()
|
||||||
|
|
||||||
|
#llvm.set_option('', '--debug-only=loop-vectorize')
|
||||||
|
|
||||||
# does this do anything?
|
# does this do anything?
|
||||||
builder = llvm.PassManagerBuilder()
|
builder = llvm.PassManagerBuilder()
|
||||||
builder.opt_level = 3
|
builder.opt_level = 3
|
||||||
builder.loop_vectorize = True
|
builder.loop_vectorize = True # this changes loop-vectorize debug output
|
||||||
builder.populate(LLVM.optimizer)
|
builder.populate(LLVM.optimizer)
|
||||||
|
|
||||||
LLVM.target_machine = target.create_target_machine(opt=3) # this opt actually can change things
|
LLVM.target_machine = target.create_target_machine(opt=3) # this opt actually can change things
|
||||||
|
|
|
@ -60,7 +60,7 @@ class TestOps(unittest.TestCase):
|
||||||
def test_sub(self):
|
def test_sub(self):
|
||||||
helper_test_op([(45,65), (45,65)], lambda x,y: x-y, Tensor.sub)
|
helper_test_op([(45,65), (45,65)], lambda x,y: x-y, Tensor.sub)
|
||||||
def test_mul(self):
|
def test_mul(self):
|
||||||
helper_test_op([(45,65), (45,65)], lambda x,y: x*y, Tensor.mul)
|
helper_test_op([(64,64), (64,64)], lambda x,y: x*y, Tensor.mul)
|
||||||
def test_div(self):
|
def test_div(self):
|
||||||
helper_test_op([(45,65), (45,65)], lambda x,y: x/y, Tensor.div)
|
helper_test_op([(45,65), (45,65)], lambda x,y: x/y, Tensor.div)
|
||||||
def test_div_const(self):
|
def test_div_const(self):
|
||||||
|
|
Loading…
Reference in New Issue