amd better comments for ENABLE_SGPR_DISPATCH_PTR (#5768)

* amd better comments for ENABLE_SGPR_DISPATCH_PTR

* fix lkinter
This commit is contained in:
nimlgen 2024-07-28 16:23:38 +03:00 committed by GitHub
parent 95dda8dadf
commit 73fda023d3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 7 additions and 5 deletions

View File

@ -105,7 +105,7 @@ class AMDComputeQueue(HWComputeQueue):
self._acquire_mem(gli=0, gl2=0)
user_regs = [*data64_le(kernargs)]
if prg.kernel_code_properties & 0x2:
if prg.enable_dispatch_ptr:
dp = hsa.hsa_kernel_dispatch_packet_t.from_address(dp_addr:=kernargs + prg.kernargs_segment_size)
dp.workgroup_size_x, dp.workgroup_size_y, dp.workgroup_size_z = local_size[0], local_size[1], local_size[2]
dp.grid_size_x, dp.grid_size_y, dp.grid_size_z = global_size[0]*local_size[0], global_size[1]*local_size[1], global_size[2]*local_size[2]
@ -277,12 +277,14 @@ class AMDProgram(HCQProgram):
self.rsrc1 = code.compute_pgm_rsrc1
self.rsrc2 = code.compute_pgm_rsrc2 | (lds_size << 15)
self.kernel_code_properties = code.kernel_code_properties
self.prog_addr = self.lib_gpu.va_addr + entry_point + code.kernel_code_entry_byte_offset
# If required, allocate space for the dispatch packet in the kernargs to pass it to the GPU.
args_alloc_sz = self.kernargs_segment_size + (ctypes.sizeof(hsa.hsa_kernel_dispatch_packet_t) if self.kernel_code_properties & 0x2 else 0)
super().__init__(self.device, self.name, kernargs_alloc_size=args_alloc_sz)
# Some programs use hsa_kernel_dispatch_packet_t to read workgroup sizes during execution.
# The packet is represented as a pointer and set up in SGPRs. Space for the packet is allocated as part of the kernel arguments.
self.enable_dispatch_ptr = code.kernel_code_properties & hsa.AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_DISPATCH_PTR
additional_alloc_sz = ctypes.sizeof(hsa.hsa_kernel_dispatch_packet_t) if self.enable_dispatch_ptr else 0
super().__init__(self.device, self.name, kernargs_alloc_size=self.kernargs_segment_size+additional_alloc_sz)
def __del__(self):
if hasattr(self, 'lib_gpu'): cast(AMDDevice, self.device)._gpu_free(self.lib_gpu)