use threefry in stable diffusion benchmark (#4988)

also updated default steps to 10. easier to tell the image is following the prompt.
2024-06-15 20:25:29 -04:00 · 2024-06-15 20:25:29 -04:00 · 44dfa37c70
parent 20b50d8d64
commit 44dfa37c70
3 changed files with 4 additions and 4 deletions
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@ -38,7 +38,7 @@ jobs:
    # if: github.event.inputs.run_process_replay == 'true' || contains(github.event.head_commit.message, '[run_process_replay]') || contains(github.event.pull_request.title, '[run_process_replay]')
    # run: echo "RUN_PROCESS_REPLAY=1" >> $GITHUB_ENV
    - name: Run Stable Diffusion
-      run: JIT=2 python3 examples/stable_diffusion.py --seed 0 --noshow --timing | tee sd.txt
+      run: JIT=2 THREEFRY=1 python3 examples/stable_diffusion.py --seed 0 --noshow --timing | tee sd.txt
    - name: Run model inference benchmark
      run: METAL=1 python3 test/external/external_model_benchmark.py
    - name: Test speed vs torch
@ -155,7 +155,7 @@ jobs:
    - name: Fuzz Padded Tensor Core GEMM(PTX)
      run: CUDA=1 PTX=1 M_START=12 M_STOP=20 M_STEP=1 N_START=6 N_STOP=10 N_STEP=1 K_START=28 K_STOP=36 K_STEP=1 HALF=1 TC_OPT=2 python3 ./extra/gemm/fuzz_matmul.py
    - name: Run Stable Diffusion
-      run: CUDA=1 python3 examples/stable_diffusion.py --seed 0 --noshow --timing | tee sd.txt
+      run: CUDA=1 THREEFRY=1 python3 examples/stable_diffusion.py --seed 0 --noshow --timing | tee sd.txt
    - name: Run LLaMA
      run: |
        CUDA=1 JIT=0 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_unjitted.txt
@ -306,7 +306,7 @@ jobs:
    #- name: Fuzz Padded Tensor Core GEMM
    #  run: HSA=1 M_START=12 M_STOP=20 M_STEP=1 N_START=12 N_STOP=20 N_STEP=1 K_START=28 K_STOP=36 K_STEP=1 HALF=1 TC_OPT=2 DEBUG=2 python3 ./extra/gemm/fuzz_matmul.py
    - name: Run Stable Diffusion
-      run: AMD=1 python3 examples/stable_diffusion.py --seed 0 --noshow --timing | tee sd.txt
+      run: AMD=1 THREEFRY=1 python3 examples/stable_diffusion.py --seed 0 --noshow --timing | tee sd.txt
    - name: Run LLaMA 7B
      run: |
        AMD=1 JIT=0 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_unjitted.txt
--- a/examples/stable_diffusion.py
+++ b/examples/stable_diffusion.py
@ -576,7 +576,7 @@ class StableDiffusion:
 if __name__ == "__main__":
  default_prompt = "a horse sized cat eating a bagel"
  parser = argparse.ArgumentParser(description='Run Stable Diffusion', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-  parser.add_argument('--steps', type=int, default=5, help="Number of steps in diffusion")
+  parser.add_argument('--steps', type=int, default=10, help="Number of steps in diffusion")
  parser.add_argument('--prompt', type=str, default=default_prompt, help="Phrase to render")
  parser.add_argument('--out', type=str, default=Path(tempfile.gettempdir()) / "rendered.png", help="Output filename")
  parser.add_argument('--noshow', action='store_true', help="Don't show the image")
--- a/examples/stable_diffusion_seed0.png
+++ b/examples/stable_diffusion_seed0.png