mirror of https://github.com/commaai/tinygrad.git
44 lines
2.1 KiB
Python
44 lines
2.1 KiB
Python
import ctypes, mmap, time
|
|
from tinygrad.runtime.ops_amd import AMDDevice, kio, sdma_pkts, libc
|
|
import tinygrad.runtime.autogen.amd_sdma as amd_sdma
|
|
import tinygrad.runtime.autogen.kfd as kfd
|
|
from tinygrad.helpers import to_mv
|
|
|
|
if __name__ == "__main__":
|
|
dev = AMDDevice()
|
|
|
|
sdma_ring = dev._gpu_alloc(1 << 22, kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, uncached=True)
|
|
gart = dev._gpu_alloc(0x1000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_GTT, uncached=True)
|
|
sdma_queue = kio.create_queue(AMDDevice.kfd,
|
|
ring_base_address=sdma_ring.va_addr, ring_size=sdma_ring.size, gpu_id=dev.gpu_id,
|
|
queue_type=kfd.KFD_IOC_QUEUE_TYPE_SDMA, queue_percentage=kfd.KFD_MAX_QUEUE_PERCENTAGE, queue_priority=kfd.KFD_MAX_QUEUE_PRIORITY,
|
|
write_pointer_address=gart.va_addr + 0x100, read_pointer_address=gart.va_addr + 0x108)
|
|
|
|
doorbells_base = sdma_queue.doorbell_offset & (~0xfff)
|
|
doorbells = libc.mmap(0, 8192, mmap.PROT_READ|mmap.PROT_WRITE, mmap.MAP_SHARED, AMDDevice.kfd, doorbells_base)
|
|
|
|
sdma_read_pointer = to_mv(sdma_queue.read_pointer_address, 8).cast("Q")
|
|
sdma_write_pointer = to_mv(sdma_queue.write_pointer_address, 8).cast("Q")
|
|
sdma_doorbell = to_mv(doorbells + sdma_queue.doorbell_offset - doorbells_base, 4).cast("I")
|
|
|
|
test_write_page = dev._gpu_alloc(0x1000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, uncached=True)
|
|
cmd = sdma_pkts.timestamp(op=amd_sdma.SDMA_OP_TIMESTAMP, sub_op=amd_sdma.SDMA_SUBOP_TIMESTAMP_GET_GLOBAL, addr=test_write_page.va_addr)
|
|
|
|
sdma_doorbell_value = 0
|
|
def blit_sdma_command(cmd):
|
|
ctypes.memmove(sdma_ring.va_addr + (sdma_doorbell_value % sdma_ring.size), ctypes.addressof(cmd), sz:=ctypes.sizeof(cmd))
|
|
return sz
|
|
|
|
while True:
|
|
sdma_doorbell_value += blit_sdma_command(cmd)
|
|
sdma_write_pointer[0] = sdma_doorbell_value
|
|
sdma_doorbell[0] = sdma_doorbell_value
|
|
while sdma_read_pointer[0] != sdma_write_pointer[0]: continue
|
|
tm = to_mv(test_write_page.va_addr, 0x1000).cast("Q")[0]/1e8
|
|
print(f"{tm:.3f} s @ 0x{sdma_ring.va_addr + (sdma_doorbell_value % sdma_ring.size):X} R:0x{sdma_queue.read_pointer_address:X} W:0x{sdma_queue.write_pointer_address:X}")
|
|
time.sleep(0.01)
|
|
|
|
|
|
|
|
|