vectorization

This commit is contained in:
George Hotz 2022-10-29 12:47:52 -07:00
parent e473d35f90
commit 52bfbc31be
2 changed files with 9 additions and 3 deletions

View File

@ -80,13 +80,17 @@ def idx_deref(builder, buf, ptr, eidx):
else: else:
return builder.load(builder.gep(ptr, [idx])) return builder.load(builder.gep(ptr, [idx]))
# https://blog.christianperone.com/2022/09/tutorial-on-using-llvm-to-jit-pytorch-fx-graphs-to-native-code-x86-arm-risc-v-wasm-part-i-scalars/
class LLVM: class LLVM:
target_machine = None target_machine = None
engine = None engine = None
optimizer = None optimizer = None
# if it can't vectorize # if it can't vectorize
# OPT=2 DEBUG=3 LLVM=1 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_sum_full # OPT=2 DEBUG=3 LLVM=1 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_mul
# if can't vectorize anything # if can't vectorize anything
# looks like we have two options, either use clang or handle vectorization in tinygrad
# for the sake of the GPU, we should probably do in tinygrad
def __init__(self): def __init__(self):
if LLVM.engine is not None: if LLVM.engine is not None:
return return
@ -96,10 +100,12 @@ class LLVM:
target = llvm.Target.from_default_triple() target = llvm.Target.from_default_triple()
LLVM.optimizer = llvm.ModulePassManager() LLVM.optimizer = llvm.ModulePassManager()
#llvm.set_option('', '--debug-only=loop-vectorize')
# does this do anything? # does this do anything?
builder = llvm.PassManagerBuilder() builder = llvm.PassManagerBuilder()
builder.opt_level = 3 builder.opt_level = 3
builder.loop_vectorize = True builder.loop_vectorize = True # this changes loop-vectorize debug output
builder.populate(LLVM.optimizer) builder.populate(LLVM.optimizer)
LLVM.target_machine = target.create_target_machine(opt=3) # this opt actually can change things LLVM.target_machine = target.create_target_machine(opt=3) # this opt actually can change things

View File

@ -60,7 +60,7 @@ class TestOps(unittest.TestCase):
def test_sub(self): def test_sub(self):
helper_test_op([(45,65), (45,65)], lambda x,y: x-y, Tensor.sub) helper_test_op([(45,65), (45,65)], lambda x,y: x-y, Tensor.sub)
def test_mul(self): def test_mul(self):
helper_test_op([(45,65), (45,65)], lambda x,y: x*y, Tensor.mul) helper_test_op([(64,64), (64,64)], lambda x,y: x*y, Tensor.mul)
def test_div(self): def test_div(self):
helper_test_op([(45,65), (45,65)], lambda x,y: x/y, Tensor.div) helper_test_op([(45,65), (45,65)], lambda x,y: x/y, Tensor.div)
def test_div_const(self): def test_div_const(self):