mirror of https://github.com/commaai/tinygrad.git
amd hdp flush regs are on seg2 (#4925)
This commit is contained in:
parent
4bfd1904f6
commit
225f792330
|
@ -64,6 +64,7 @@ WAIT_REG_MEM_FUNCTION_GEQ = 5 # >=
|
|||
COMPUTE_SHADER_EN, FORCE_START_AT_000, CS_W32_EN = (1 << 0), (1 << 2), (1 << 15)
|
||||
|
||||
def gfxreg(reg): return reg + 0x00001260 - amd_gpu.PACKET3_SET_SH_REG_START
|
||||
def nbioreg(reg): return reg + 0x00000d20 # NBIO_BASE__INST0_SEG2
|
||||
def data64_le(data): return (data & 0xFFFFFFFF, data >> 32)
|
||||
|
||||
class AMDCompiler(Compiler):
|
||||
|
@ -84,9 +85,10 @@ class HWPM4Queue:
|
|||
def ptr(self) -> int: return len(self.q)
|
||||
|
||||
def hdp_flush(self):
|
||||
self.q += [amd_gpu.PACKET3(amd_gpu.PACKET3_WAIT_REG_MEM, 5),
|
||||
amd_gpu.WAIT_REG_MEM_MEM_SPACE(0) | amd_gpu.WAIT_REG_MEM_OPERATION(1) | amd_gpu.WAIT_REG_MEM_FUNCTION(WAIT_REG_MEM_FUNCTION_EQ) | \
|
||||
amd_gpu.WAIT_REG_MEM_ENGINE(0), regBIF_BX_PF1_GPU_HDP_FLUSH_REQ, regBIF_BX_PF1_GPU_HDP_FLUSH_DONE, 0x0, 0x0, 0x20]
|
||||
self.q += [amd_gpu.PACKET3(amd_gpu.PACKET3_WAIT_REG_MEM, 5), amd_gpu.WAIT_REG_MEM_MEM_SPACE(0) | amd_gpu.WAIT_REG_MEM_OPERATION(1) | \
|
||||
amd_gpu.WAIT_REG_MEM_FUNCTION(WAIT_REG_MEM_FUNCTION_EQ) | amd_gpu.WAIT_REG_MEM_ENGINE(0), nbioreg(regBIF_BX_PF1_GPU_HDP_FLUSH_REQ),
|
||||
nbioreg(regBIF_BX_PF1_GPU_HDP_FLUSH_DONE), 0xffffffff, 0xffffffff, 0x20]
|
||||
return self
|
||||
|
||||
def invalidate_cache(self, addr=0x0, sz=(1 << 64)-1, gli=1, glm=1, glk=1, glv=1, gl1=1, gl2=1):
|
||||
self.q += [amd_gpu.PACKET3(amd_gpu.PACKET3_ACQUIRE_MEM, 6), 0, #0x80000000,
|
||||
|
@ -99,8 +101,7 @@ class HWPM4Queue:
|
|||
return self
|
||||
|
||||
def exec(self, prg, kernargs, global_size:Tuple[int,int,int]=(1,1,1), local_size:Tuple[int,int,int]=(1,1,1), signal=None, signal_value=0):
|
||||
self.hdp_flush()
|
||||
self.invalidate_cache()
|
||||
self.hdp_flush().invalidate_cache()
|
||||
|
||||
user_data = [*data64_le(kernargs)]
|
||||
if hasattr(prg, 'dispatch_packet_offset'):
|
||||
|
|
Loading…
Reference in New Issue