hcq no timeline signals in init (#6944)

This commit is contained in:
nimlgen 2024-10-07 23:36:19 +03:00 committed by GitHub
parent 0ecc417dd2
commit 42609300ff
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 10 additions and 10 deletions

View File

@ -30,10 +30,10 @@ def gfxreg(reg): return reg + 0x00001260 - amd_gpu.PACKET3_SET_SH_REG_START
def nbioreg(reg): return reg + 0x00000d20 # NBIO_BASE__INST0_SEG2
class AMDSignal(HCQSignal):
def __init__(self, value=0, alloc_event=False):
def __init__(self, value=0, is_timeline=False):
self._signal = AMDDevice.signals_pool.pop()
self._value_addr, self._timestamp_addr = mv_address(self._signal), mv_address(self._signal) + 8
if alloc_event:
if is_timeline:
sync_event = kfd.AMDKFD_IOC_CREATE_EVENT(AMDDevice.kfd, auto_reset=1)
self._event_mailbox_ptr = AMDDevice.event_page.va_addr + sync_event.event_slot_index*8
self._event_id = sync_event.event_id
@ -418,7 +418,7 @@ class AMDDevice(HCQCompiled):
self.sdma_queue = self._alloc_queue(kfd.KFD_IOC_QUEUE_TYPE_SDMA, 0x100000)
super().__init__(device, AMDAllocator(self), AMDRenderer(), AMDCompiler(self.arch), functools.partial(AMDProgram, self),
AMDSignal, AMDComputeQueue, AMDCopyQueue, (AMDSignal(alloc_event=True), AMDSignal(alloc_event=True)))
AMDSignal, AMDComputeQueue, AMDCopyQueue)
def _alloc_queue(self, queue_type, ring_size, ctx_save_restore_size=None, eop_buffer_size=None) -> AMDQueueDesc:
gart = self._gpu_alloc(0x1000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_GTT, uncached=True)

View File

@ -68,7 +68,7 @@ assert ctypes.sizeof(qmd_struct_t) == 0x40 * 4
def nvmethod(subc, mthd, size, typ=2): return (typ << 28) | (size << 16) | (subc << 13) | (mthd >> 2)
class NVSignal(HCQSignal):
def __init__(self, value=0):
def __init__(self, value=0, is_timeline=False):
self._signal = NVDevice.signals_pool.pop()
self.signal_addr = mv_address(self._signal)
super().__init__(value)
@ -480,7 +480,7 @@ class NVDevice(HCQCompiled):
compiler_t = (PTXCompiler if PTX else CUDACompiler) if MOCKGPU else (NVPTXCompiler if PTX else NVCompiler)
super().__init__(device, NVAllocator(self), PTXRenderer(self.arch, device="NV") if PTX else NVRenderer(self.arch), compiler_t(self.arch),
functools.partial(NVProgram, self), NVSignal, NVComputeQueue, NVCopyQueue, timeline_signals=(NVSignal(), NVSignal()))
functools.partial(NVProgram, self), NVSignal, NVComputeQueue, NVCopyQueue)
self._setup_gpfifos()

View File

@ -32,7 +32,7 @@ class QCOMCompiler(CLCompiler):
def __init__(self, device:str=""): super().__init__(CLDevice(device), 'compile_qcom')
class QCOMSignal(HCQSignal):
def __init__(self, value=0, **kwargs):
def __init__(self, value=0, is_timeline=False):
self._signal = QCOMDevice.signals_pool.pop()
super().__init__(value)
def __del__(self): QCOMDevice.signals_pool.append(self._signal)
@ -351,7 +351,7 @@ class QCOMDevice(HCQCompiled):
if QCOMDevice.gpu_id >= 700: raise RuntimeError(f"Unsupported GPU: {QCOMDevice.gpu_id}")
super().__init__(device, QCOMAllocator(self), QCOMRenderer(), QCOMCompiler(device), functools.partial(QCOMProgram, self),
QCOMSignal, QCOMComputeQueue, None, timeline_signals=(QCOMSignal(), QCOMSignal()))
QCOMSignal, QCOMComputeQueue, None)
def _ctx_create(self):
cr = kgsl.IOCTL_KGSL_DRAWCTXT_CREATE(self.fd, flags=(kgsl.KGSL_CONTEXT_PREAMBLE | kgsl.KGSL_CONTEXT_PWR_CONSTRAINT |

View File

@ -195,7 +195,7 @@ class HWCopyQueue(HWCommandQueue):
def _update_copy(self, cmd_idx, dest, src): raise NotImplementedError("backend should overload this function")
class HCQSignal:
def __init__(self, value:int=0): self._set_value(value)
def __init__(self, value:int=0, is_timeline:bool=False): self._set_value(value)
@property
def value(self) -> int: return self._get_value()
@ -346,10 +346,10 @@ class HCQCompiled(Compiled):
gpu2cpu_compute_time_diff: decimal.Decimal = decimal.Decimal('nan')
def __init__(self, device:str, allocator:Allocator, renderer:Renderer, compiler:Compiler, runtime, signal_t:Type[HCQSignal],
comp_queue_t:Type[HWComputeQueue], copy_queue_t:Optional[Type[HWCopyQueue]], timeline_signals:Tuple[HCQSignal, HCQSignal]):
comp_queue_t:Type[HWComputeQueue], copy_queue_t:Optional[Type[HWCopyQueue]]):
self.signal_t, self.hw_compute_queue_t, self.hw_copy_queue_t = signal_t, comp_queue_t, copy_queue_t
self.timeline_value:int = 1
self.timeline_signal, self._shadow_timeline_signal = timeline_signals
self.timeline_signal, self._shadow_timeline_signal = self.signal_t(0, is_timeline=True), self.signal_t(0, is_timeline=True)
self.sig_prof_records:List[Tuple[HCQSignal, HCQSignal, str, bool]] = []
self.raw_prof_records:List[Tuple[decimal.Decimal, decimal.Decimal, str, bool, Optional[Dict]]] = []
self.dep_prof_records:List[Tuple[decimal.Decimal, decimal.Decimal, HCQCompiled, bool, decimal.Decimal, decimal.Decimal, HCQCompiled, bool]] = []