tinygrad/extra/hip_gpu_driver/test_sdma_fun.py

44 lines
2.1 KiB
Python

import ctypes, mmap, time
from tinygrad.runtime.ops_amd import AMDDevice, kio, sdma_pkts, libc
import tinygrad.runtime.autogen.amd_sdma as amd_sdma
import tinygrad.runtime.autogen.kfd as kfd
from tinygrad.helpers import to_mv
if __name__ == "__main__":
dev = AMDDevice()
sdma_ring = dev._gpu_alloc(1 << 22, kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, uncached=True)
gart = dev._gpu_alloc(0x1000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_GTT, uncached=True)
sdma_queue = kio.create_queue(AMDDevice.kfd,
ring_base_address=sdma_ring.va_addr, ring_size=sdma_ring.size, gpu_id=dev.gpu_id,
queue_type=kfd.KFD_IOC_QUEUE_TYPE_SDMA, queue_percentage=kfd.KFD_MAX_QUEUE_PERCENTAGE, queue_priority=kfd.KFD_MAX_QUEUE_PRIORITY,
write_pointer_address=gart.va_addr + 0x100, read_pointer_address=gart.va_addr + 0x108)
doorbells_base = sdma_queue.doorbell_offset & (~0xfff)
doorbells = libc.mmap(0, 8192, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_SHARED, AMDDevice.kfd, doorbells_base)
sdma_read_pointer = to_mv(sdma_queue.read_pointer_address, 8).cast("Q")
sdma_write_pointer = to_mv(sdma_queue.write_pointer_address, 8).cast("Q")
sdma_doorbell = to_mv(doorbells + sdma_queue.doorbell_offset - doorbells_base, 4).cast("I")
test_write_page = dev._gpu_alloc(0x1000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, uncached=True)
cmd = sdma_pkts.timestamp(op=amd_sdma.SDMA_OP_TIMESTAMP, sub_op=amd_sdma.SDMA_SUBOP_TIMESTAMP_GET_GLOBAL, addr=test_write_page.va_addr)
sdma_doorbell_value = 0
def blit_sdma_command(cmd):
ctypes.memmove(sdma_ring.va_addr + (sdma_doorbell_value % sdma_ring.size), ctypes.addressof(cmd), sz:=ctypes.sizeof(cmd))
return sz
while True:
sdma_doorbell_value += blit_sdma_command(cmd)
sdma_write_pointer[0] = sdma_doorbell_value
sdma_doorbell[0] = sdma_doorbell_value
while sdma_read_pointer[0] != sdma_write_pointer[0]: continue
tm = to_mv(test_write_page.va_addr, 0x1000).cast("Q")[0]/1e8
print(f"{tm:.3f} s @ 0x{sdma_ring.va_addr + (sdma_doorbell_value % sdma_ring.size):X} R:0x{sdma_queue.read_pointer_address:X} W:0x{sdma_queue.write_pointer_address:X}")
time.sleep(0.01)