name: Benchmarks on: push: branches: - master - update_benchmark jobs: testmacbenchmark: name: Mac Benchmark runs-on: [self-hosted, macOS] defaults: run: shell: bash -o pipefail {0} if: github.repository_owner == 'tinygrad' env: PYTHONPATH: . steps: - name: Checkout Code uses: actions/checkout@v3 - name: Symlink models and datasets run: | ln -s ~/tinygrad/disassemblers/applegpu disassemblers/applegpu ln -s ~/tinygrad/weights/sd-v1-4.ckpt weights/sd-v1-4.ckpt ln -s ~/tinygrad/weights/bpe_simple_vocab_16e6.txt.gz weights/bpe_simple_vocab_16e6.txt.gz ln -s ~/tinygrad/weights/LLaMA weights/LLaMA ln -s ~/tinygrad/extra/datasets/cifar-10-python.tar.gz extra/datasets/cifar-10-python.tar.gz - name: Run model inference benchmark run: python3 test/external/external_model_benchmark.py - name: Test speed vs torch run: BIG=2 MPS=1 python3 test/test_speed_v_torch.py | tee torch_speed.txt - name: Run Tensor Core GEMM run: DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul.txt - name: Run Stable Diffusion run: python3 examples/stable_diffusion.py --noshow --timing | tee sd.txt - name: Run LLaMA run: | JIT=0 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_unjitted.txt JIT=1 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_jitted.txt - name: Run GPT2 run: | JIT=0 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_unjitted.txt JIT=1 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_jitted.txt - name: Run 10 CIFAR training steps run: STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar.txt - name: Run 10 CIFAR training steps w winograd run: WINO=1 STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar_wino.txt - uses: actions/upload-artifact@v3 with: name: Speed (Mac) path: | onnx_inference_speed.csv torch_speed.txt train_cifar.txt train_cifar_wino.txt llama_unjitted.txt llama_jitted.txt gpt2_unjitted.txt gpt2_jitted.txt matmul.txt sd.txt testnvidiabenchmark: name: NVIDIA Benchmark runs-on: [self-hosted, Linux, CUDA] defaults: run: shell: bash -o pipefail {0} if: github.repository_owner == 'tinygrad' env: PYTHONPATH: . steps: - name: Checkout Code uses: actions/checkout@v3 - name: Run model inference benchmark run: CUDA=1 python3 test/external/external_model_benchmark.py - name: Test speed vs torch run: CUDA=1 BIG=2 TORCHCUDA=1 python3 test/test_speed_v_torch.py | tee torch_speed.txt - name: Run GPT2 run: | CUDA=1 JIT=0 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_unjitted.txt CUDA=1 JIT=1 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_jitted.txt - uses: actions/upload-artifact@v3 with: name: Speed (NVIDIA) path: | onnx_inference_speed.csv torch_speed.txt gpt2_unjitted.txt gpt2_jitted.txt testamdbenchmark: name: AMD Benchmark runs-on: [self-hosted, Linux, ROCM] defaults: run: shell: bash -o pipefail {0} if: github.repository_owner == 'tinygrad' env: PYTHONPATH: . steps: - name: Checkout Code uses: actions/checkout@v3 - name: Symlink models and datasets run: | ln -s ~/tinygrad/weights/sd-v1-4.ckpt weights/sd-v1-4.ckpt ln -s ~/tinygrad/weights/bpe_simple_vocab_16e6.txt.gz weights/bpe_simple_vocab_16e6.txt.gz ln -s ~/tinygrad/weights/LLaMA weights/LLaMA ln -s ~/tinygrad/extra/datasets/cifar-10-python.tar.gz extra/datasets/cifar-10-python.tar.gz - name: Run model inference benchmark run: python3 test/external/external_model_benchmark.py - name: Test speed vs torch run: BIG=2 TORCHCUDA=1 python3 test/test_speed_v_torch.py | tee torch_speed.txt - name: Run Tensor Core GEMM run: HIP=1 HALF=1 DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul.txt - name: Run Stable Diffusion run: python3 examples/stable_diffusion.py --noshow --timing | tee sd.txt - name: Run LLaMA run: | JIT=0 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_unjitted.txt JIT=1 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_jitted.txt - name: Run GPT2 (with HIP) run: | HIP=1 JIT=0 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_unjitted.txt HIP=1 JIT=1 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_jitted.txt - name: Run 10 CIFAR training steps run: STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar.txt - name: Run 10 CIFAR training steps w winograd run: WINO=1 STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar_wino.txt #- name: Run 10 CIFAR training steps w WINO/HALF/HIP # run: HALF=1 HIP=1 WINO=1 STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar_wino_half_hip.txt - uses: actions/upload-artifact@v3 with: name: Speed (AMD) path: | onnx_inference_speed.csv torch_speed.txt train_cifar.txt train_cifar_wino.txt train_cifar_wino_half_hip.txt llama_unjitted.txt llama_jitted.txt gpt2_unjitted.txt gpt2_jitted.txt matmul.txt sd.txt