mirror of https://github.com/commaai/tinygrad.git
rocm: disassembler for shader
This commit is contained in:
parent
7fbf96b992
commit
5190037cbc
|
@ -6,3 +6,18 @@ class Timing(object):
|
|||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.et = time.perf_counter_ns() - self.st
|
||||
if self.enabled: print(f"{self.prefix}{self.et*1e-6:.2f} ms"+(self.on_exit(self.et) if self.on_exit else ""))
|
||||
|
||||
def enable_early_exec():
|
||||
import subprocess, multiprocessing
|
||||
qin, qout = multiprocessing.Queue(), multiprocessing.Queue()
|
||||
def _early_exec_process(qin, qout):
|
||||
while 1:
|
||||
path, inp = qin.get()
|
||||
qout.put(subprocess.check_output(path, input=inp))
|
||||
p = multiprocessing.Process(target=_early_exec_process, args=(qin, qout))
|
||||
p.daemon = True
|
||||
p.start()
|
||||
def early_exec(x):
|
||||
qin.put(x)
|
||||
return qout.get()
|
||||
return early_exec
|
||||
|
|
|
@ -72,8 +72,14 @@ static void handler(int sig, siginfo_t *si, void *unused) {
|
|||
D("HSA_PACKET_TYPE_KERNEL_DISPATCH -- setup:%d workgroup[%d, %d, %d] grid[%d, %d, %d] kernel_object:0x%lx kernarg_address:%p\n", pkt->setup, pkt->workgroup_size_x, pkt->workgroup_size_y, pkt->workgroup_size_z, pkt->grid_size_x, pkt->grid_size_y, pkt->grid_size_z, pkt->kernel_object, pkt->kernarg_address);
|
||||
amd_kernel_code_t *code = (amd_kernel_code_t *)pkt->kernel_object;
|
||||
D("kernel_code_entry_byte_offset:%lx\n", code->kernel_code_entry_byte_offset);
|
||||
hexdump((void*)(pkt->kernel_object + code->kernel_code_entry_byte_offset), 0x200);
|
||||
//hexdump((void*)pkt->kernel_object, sizeof(amd_kernel_code_t));
|
||||
uint32_t *kernel_code = (uint32_t*)(pkt->kernel_object + code->kernel_code_entry_byte_offset);
|
||||
int code_len = 0;
|
||||
while (kernel_code[code_len] != 0xbf9f0000 && kernel_code[code_len] != 0) code_len++;
|
||||
hexdump(kernel_code, code_len*4);
|
||||
/*FILE *f = fopen("/tmp/kernel_code", "wb");
|
||||
fwrite(kernel_code, 4, code_len, f);
|
||||
fclose(f);
|
||||
system("python -c 'print(\" \".join([(\"0x%02X\"%x) for x in open(\"/tmp/kernel_code\", \"rb\").read()]))' | ../build/llvm-project/bin/llvm-mc --disassemble --arch=amdgcn --mcpu=gfx1100 --show-encoding");*/
|
||||
} else if ((pkt->header&0xFF) == HSA_PACKET_TYPE_BARRIER_AND) {
|
||||
D("HSA_PACKET_TYPE_BARRIER_AND\n");
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from __future__ import annotations
|
||||
import platform
|
||||
import platform, pathlib
|
||||
import numpy as np
|
||||
import pyopencl as cl # type: ignore
|
||||
from typing import Optional, List
|
||||
|
@ -12,6 +12,11 @@ OSX = platform.system() == "Darwin"
|
|||
OSX_TIMING_RATIO = (125/3) if OSX else 1.0 # see test/external_osx_profiling.py to determine this ratio. it's in like GPU clocks or something
|
||||
FLOAT16 = getenv("FLOAT16", 0)
|
||||
|
||||
# TODO: if you fork and exit the child process after creating anything with cl on AMD, it hangs on e.wait()
|
||||
if DEBUG >= 5:
|
||||
from extra.helpers import enable_early_exec
|
||||
early_exec = enable_early_exec()
|
||||
|
||||
class _CL:
|
||||
def __init__(self):
|
||||
platforms: List[List[cl.Device]] = [y for y in ([x.get_devices(device_type=cl.device_type.GPU) for x in cl.get_platforms()] + [x.get_devices(device_type=cl.device_type.CPU) for x in cl.get_platforms()]) if len(y)]
|
||||
|
@ -54,6 +59,9 @@ class CLProgram:
|
|||
if 'Adreno' in CL.cl_ctx.devices[0].name:
|
||||
from disassemblers.adreno import disasm
|
||||
disasm(self.binary())
|
||||
elif 'gfx1100' in CL.cl_ctx.devices[0].name:
|
||||
asm = early_exec(([pathlib.Path(__file__).parent.parent.parent / "extra/rocm/build/llvm-project/bin/llvm-objdump", '-d', '-'], self.binary()))
|
||||
print('\n'.join([x for x in asm.decode('utf-8').split("\n") if 's_code_end' not in x]))
|
||||
else:
|
||||
# print the PTX for NVIDIA. TODO: probably broken for everything else
|
||||
print(self.binary().decode('utf-8'))
|
||||
|
|
Loading…
Reference in New Issue