diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index ab3ebe27..08887967 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -22,7 +22,7 @@ jobs: - name: Symlink models and datasets run: | mkdir -p weights - ln -s ~/tinygrad/disassemblers/applegpu disassemblers/applegpu + ln -s ~/tinygrad/extra/disassemblers/applegpu extra/disassemblers/applegpu ln -s ~/tinygrad/weights/sd-v1-4.ckpt weights/sd-v1-4.ckpt ln -s ~/tinygrad/weights/bpe_simple_vocab_16e6.txt.gz weights/bpe_simple_vocab_16e6.txt.gz ln -s ~/tinygrad/weights/LLaMA weights/LLaMA diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5064f106..18a01d08 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -167,17 +167,17 @@ jobs: - if: ${{ matrix.task == 'openpilot' }} name: Test openpilot model compile and size run: | - DEBUG=2 ALLOWED_KERNEL_COUNT=208 FLOAT16=1 DEBUGCL=1 GPU=1 IMAGE=2 python openpilot/compile2.py + DEBUG=2 ALLOWED_KERNEL_COUNT=208 FLOAT16=1 DEBUGCL=1 GPU=1 IMAGE=2 python examples/openpilot/compile2.py #python -c 'import os; assert os.path.getsize("/tmp/output.thneed") < 100_000_000' - if: ${{ matrix.task == 'openpilot' }} name: Test openpilot model correctness (float32) - run: FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python openpilot/compile2.py + run: FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python examples/openpilot/compile2.py - if: ${{ matrix.task == 'openpilot' }} name: Test openpilot alt model correctness (float32) - run: FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python openpilot/compile2.py https://github.com/commaai/openpilot/raw/3799fe46b3a629e491d4b8498b8ae83e4c88c304/selfdrive/modeld/models/supercombo.onnx + run: FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python examples/openpilot/compile2.py https://github.com/commaai/openpilot/raw/3799fe46b3a629e491d4b8498b8ae83e4c88c304/selfdrive/modeld/models/supercombo.onnx - if: ${{ matrix.task == 'openpilot' }} name: Test openpilot fastvits model correctness (float32) - run: FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python openpilot/compile2.py https://github.com/commaai/openpilot/raw/9118973ed03c1ae1d40cf69a29507ec2cc78efd7/selfdrive/modeld/models/supercombo.onnx + run: FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python examples/openpilot/compile2.py https://github.com/commaai/openpilot/raw/9118973ed03c1ae1d40cf69a29507ec2cc78efd7/selfdrive/modeld/models/supercombo.onnx - if: ${{ matrix.task == 'onnx' }} name: Test ONNX (GPU) run: GPU=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 diff --git a/.gitignore b/.gitignore index a897885d..e4b26b40 100644 --- a/.gitignore +++ b/.gitignore @@ -18,9 +18,8 @@ pandecode.dump vertex.bin recognize* .idea -disassemblers/applegpu -disassemblers/cuda_ioctl_sniffer *.prof +extra/disassemblers/applegpu extra/datasets/cifar-10-python.tar.gz extra/datasets/librispeech/ extra/datasets/imagenet/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f254c7c7..a1bc8fcb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ repos: hooks: - id: whitespace name: strip whitespace - entry: ./strip_whitespace.sh + entry: find tinygrad -type f -name "*.py" -exec sed -i '' 's/ *$//' '{}' ';' language: system always_run: true pass_filenames: false diff --git a/LICENSE b/LICENSE index 7b2f1639..d42cf56f 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2023 George Hotz +Copyright (c) 2024, the tiny corp Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: diff --git a/README.md b/README.md index 98fbde37..d3e74900 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,8 @@
- - tiny corp logo + tiny corp logo tinygrad: For something between [PyTorch](https://github.com/pytorch/pytorch) and [karpathy/micrograd](https://github.com/karpathy/micrograd). Maintained by [tiny corp](https://tinygrad.org). diff --git a/openpilot/compile2.py b/examples/openpilot/compile2.py similarity index 100% rename from openpilot/compile2.py rename to examples/openpilot/compile2.py diff --git a/examples/openpilot/go.sh b/examples/openpilot/go.sh new file mode 100755 index 00000000..dbd17a5e --- /dev/null +++ b/examples/openpilot/go.sh @@ -0,0 +1,2 @@ +#!/bin/bash +NOLOCALS=1 FLOAT16=1 DEBUGCL=1 IMAGE=2 GPU=1 python3 examples/openpilot/compile2.py diff --git a/disassemblers/adreno/.gitignore b/extra/disassemblers/adreno/.gitignore similarity index 100% rename from disassemblers/adreno/.gitignore rename to extra/disassemblers/adreno/.gitignore diff --git a/disassemblers/adreno/README b/extra/disassemblers/adreno/README similarity index 100% rename from disassemblers/adreno/README rename to extra/disassemblers/adreno/README diff --git a/disassemblers/adreno/__init__.py b/extra/disassemblers/adreno/__init__.py similarity index 100% rename from disassemblers/adreno/__init__.py rename to extra/disassemblers/adreno/__init__.py diff --git a/disassemblers/adreno/disasm-a3xx.c b/extra/disassemblers/adreno/disasm-a3xx.c similarity index 100% rename from disassemblers/adreno/disasm-a3xx.c rename to extra/disassemblers/adreno/disasm-a3xx.c diff --git a/disassemblers/adreno/instr-a3xx.h b/extra/disassemblers/adreno/instr-a3xx.h similarity index 100% rename from disassemblers/adreno/instr-a3xx.h rename to extra/disassemblers/adreno/instr-a3xx.h diff --git a/disassemblers/adreno/ir3.h b/extra/disassemblers/adreno/ir3.h similarity index 100% rename from disassemblers/adreno/ir3.h rename to extra/disassemblers/adreno/ir3.h diff --git a/disassemblers/adreno/shader_enums.h b/extra/disassemblers/adreno/shader_enums.h similarity index 100% rename from disassemblers/adreno/shader_enums.h rename to extra/disassemblers/adreno/shader_enums.h diff --git a/disassemblers/adreno/util/bitscan.h b/extra/disassemblers/adreno/util/bitscan.h similarity index 100% rename from disassemblers/adreno/util/bitscan.h rename to extra/disassemblers/adreno/util/bitscan.h diff --git a/disassemblers/adreno/util/bitset.h b/extra/disassemblers/adreno/util/bitset.h similarity index 100% rename from disassemblers/adreno/util/bitset.h rename to extra/disassemblers/adreno/util/bitset.h diff --git a/disassemblers/adreno/util/list.h b/extra/disassemblers/adreno/util/list.h similarity index 100% rename from disassemblers/adreno/util/list.h rename to extra/disassemblers/adreno/util/list.h diff --git a/disassemblers/adreno/util/macros.h b/extra/disassemblers/adreno/util/macros.h similarity index 100% rename from disassemblers/adreno/util/macros.h rename to extra/disassemblers/adreno/util/macros.h diff --git a/extra/optimization/generate_dataset.sh b/extra/optimization/generate_dataset.sh index cd4b275c..dde903ba 100755 --- a/extra/optimization/generate_dataset.sh +++ b/extra/optimization/generate_dataset.sh @@ -17,7 +17,7 @@ python3 examples/beautiful_mnist.py python3 examples/beautiful_cartpole.py python3 examples/mlperf/model_spec.py python3 examples/yolov8.py ./test/models/efficientnet/Chicken.jpg -openpilot/go.sh +examples/openpilot/go.sh BIG=1 MPS=1 pytest test/ --ignore=test/test_fusion_op.py --ignore=test/test_linearizer_failures.py # sort and uniq diff --git a/extra/qcom_gpu_driver/opencl_ioctl.py b/extra/qcom_gpu_driver/opencl_ioctl.py index 32255f22..26f32bba 100644 --- a/extra/qcom_gpu_driver/opencl_ioctl.py +++ b/extra/qcom_gpu_driver/opencl_ioctl.py @@ -72,7 +72,7 @@ def parse_cmd_buf(dat): num_unit = vals[0]>>22 print(f"{num_unit=} {state_block=} {state_src=} {state_type=} {dst_off=}") - from disassemblers.adreno import disasm_raw + from extra.disassemblers.adreno import disasm_raw if state_type == ST6_SHADER: disasm_raw(get_mem(((vals[2] << 32) | vals[1]), 0x180)) if state_type == ST6_CONSTANTS: hexdump(get_mem(((vals[2] << 32) | vals[1]), min(0x180, num_unit*4))) pass diff --git a/openpilot/go.sh b/openpilot/go.sh deleted file mode 100755 index d99c706e..00000000 --- a/openpilot/go.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -NOLOCALS=1 FLOAT16=1 DEBUGCL=1 IMAGE=2 GPU=1 python3 openpilot/compile2.py diff --git a/push_pypi.sh b/push_pypi.sh deleted file mode 100755 index bf0f4030..00000000 --- a/push_pypi.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -e -rm -rf dist -ipython3 setup.py sdist bdist_wheel -twine upload dist/* - diff --git a/ruff.toml b/ruff.toml index 9a97914b..b0a7913a 100644 --- a/ruff.toml +++ b/ruff.toml @@ -26,12 +26,10 @@ lint.select = [ line-length = 150 exclude = [ - "disassemblers/", "docs/", "docs-legacy/", "examples/", "extra/", - "openpilot/", "tinygrad/runtime/autogen", "test/external/mlperf_resnet", "test/external/mlperf_unet3d", diff --git a/run_multibackend.sh b/run_multibackend.sh deleted file mode 100755 index e419a0f4..00000000 --- a/run_multibackend.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -e -echo "********* CPU *********" -CPU=1 python3 "$@" -echo "********* GPU *********" -GPU=1 python3 "$@" -echo "********* METAL *********" -METAL=1 python3 "$@" -echo "********* CLANG *********" -CLANG=1 python3 "$@" -echo "********* LLVM *********" -LLVM=1 python3 "$@" -echo "********* TORCH *********" -TORCH=1 python3 "$@" diff --git a/strip_whitespace.sh b/strip_whitespace.sh deleted file mode 100755 index 0fae2c97..00000000 --- a/strip_whitespace.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -find tinygrad -type f -name "*.py" -exec sed -i '' 's/ *$//' '{}' ';' diff --git a/test/external/external_multi_gpu.py b/test/external/external_multi_gpu.py index 4721ac84..00c02b41 100644 --- a/test/external/external_multi_gpu.py +++ b/test/external/external_multi_gpu.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# cd disassemblers/ && git clone --recursive github.com:geohot/cuda_ioctl_sniffer.git -# LD_PRELOAD=$PWD/disassemblers/cuda_ioctl_sniffer/out/sniff.so GPU=1 python3 test/external/external_multi_gpu.py +# cd extra/disassemblers/ && git clone --recursive github.com:geohot/cuda_ioctl_sniffer.git +# LD_PRELOAD=$PWD/extra/disassemblers/cuda_ioctl_sniffer/out/sniff.so GPU=1 python3 test/external/external_multi_gpu.py import numpy as np from tinygrad.tensor import Tensor from tinygrad.helpers import colored, Timing, getenv diff --git a/test/test_schedule.py b/test/test_schedule.py index bdfa32c8..74aefad7 100644 --- a/test/test_schedule.py +++ b/test/test_schedule.py @@ -790,7 +790,7 @@ class TestSchedule(unittest.TestCase): a = Tensor.rand(3, 4, 5).realize() out = a.log2().pad(((0, 1), (0, 1), (0, 1)), 1.0).sum().contiguous() run_schedule(check_schedule(out, 2)) - np.testing.assert_allclose(out.numpy(), np.pad(np.log2(a.numpy()), ((0, 1), (0, 1), (0, 1)), constant_values=1.0).sum()) + np.testing.assert_allclose(out.numpy(), np.pad(np.log2(a.numpy()), ((0, 1), (0, 1), (0, 1)), constant_values=1.0).sum(), rtol=1e-6) def test_shrink_pad_safe(self): a = Tensor.ones((3, )).contiguous().realize() diff --git a/tinygrad/runtime/ops_metal.py b/tinygrad/runtime/ops_metal.py index e690941b..c7702b86 100644 --- a/tinygrad/runtime/ops_metal.py +++ b/tinygrad/runtime/ops_metal.py @@ -33,7 +33,7 @@ class MetalProgram: with tempfile.NamedTemporaryFile(delete=True) as shader: shader.write(lib) shader.flush() - os.system(f"cd {pathlib.Path(__file__).parents[2]}/disassemblers/applegpu && python3 compiler_explorer.py {shader.name}") + os.system(f"cd {pathlib.Path(__file__).parents[2]}/extra/disassemblers/applegpu && python3 compiler_explorer.py {shader.name}") assert lib[:4] == b"MTLB", "Invalid Metal library. Could be due to using conda. Try system python or METAL_XCODE=1 DISABLE_COMPILER_CACHE=1." data = libdispatch.dispatch_data_create(lib, len(lib), None, None) self.library = unwrap2(self.device.device.newLibraryWithData_error_(data, None))