vectorization

2022-10-29 12:47:52 -07:00 · 2022-10-29 12:47:52 -07:00 · 52bfbc31be
parent e473d35f90
commit 52bfbc31be
2 changed files with 9 additions and 3 deletions
--- a/accel/llvm/ops_llvm.py
+++ b/accel/llvm/ops_llvm.py
@ -80,13 +80,17 @@ def idx_deref(builder, buf, ptr, eidx):
  else:
    return builder.load(builder.gep(ptr, [idx]))
 # https://blog.christianperone.com/2022/09/tutorial-on-using-llvm-to-jit-pytorch-fx-graphs-to-native-code-x86-arm-risc-v-wasm-part-i-scalars/
 class LLVM:
  target_machine = None
  engine = None
  optimizer = None
  # if it can't vectorize
-  # OPT=2 DEBUG=3 LLVM=1 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_sum_full
+  # OPT=2 DEBUG=3 LLVM=1 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_mul
  # if can't vectorize anything
  # looks like we have two options, either use clang or handle vectorization in tinygrad
  # for the sake of the GPU, we should probably do in tinygrad
  def __init__(self):
    if LLVM.engine is not None:
      return
@ -96,10 +100,12 @@ class LLVM:
    target = llvm.Target.from_default_triple()
    LLVM.optimizer = llvm.ModulePassManager()
    #llvm.set_option('', '--debug-only=loop-vectorize')
    # does this do anything?
    builder = llvm.PassManagerBuilder()
    builder.opt_level = 3
-    builder.loop_vectorize = True
+    builder.loop_vectorize = True    # this changes loop-vectorize debug output
    builder.populate(LLVM.optimizer)
    LLVM.target_machine = target.create_target_machine(opt=3)  # this opt actually can change things
--- a/test/test_ops.py
+++ b/test/test_ops.py
@ -60,7 +60,7 @@ class TestOps(unittest.TestCase):
  def test_sub(self):
    helper_test_op([(45,65), (45,65)], lambda x,y: x-y, Tensor.sub)
  def test_mul(self):
-    helper_test_op([(45,65), (45,65)], lambda x,y: x*y, Tensor.mul)
+    helper_test_op([(64,64), (64,64)], lambda x,y: x*y, Tensor.mul)
  def test_div(self):
    helper_test_op([(45,65), (45,65)], lambda x,y: x/y, Tensor.div)
  def test_div_const(self):