mirror of https://github.com/commaai/tinygrad.git
qcom match texture/sampler descriptors to OpenCL (#7622)
* qcom ioctl compare more regs * bug fix
This commit is contained in:
parent
45db7d9045
commit
408c0a5e7f
|
@ -1,6 +1,7 @@
|
||||||
# type: ignore
|
# type: ignore
|
||||||
import ctypes, ctypes.util, struct, fcntl, re
|
import ctypes, ctypes.util, struct, fcntl, re
|
||||||
from hexdump import hexdump
|
from hexdump import hexdump
|
||||||
|
from copy import deepcopy
|
||||||
import pathlib, sys
|
import pathlib, sys
|
||||||
from tinygrad.helpers import to_mv, getenv
|
from tinygrad.helpers import to_mv, getenv
|
||||||
from tinygrad.runtime.autogen import adreno
|
from tinygrad.runtime.autogen import adreno
|
||||||
|
@ -91,27 +92,36 @@ def parse_cmd_buf(dat):
|
||||||
num_unit = vals[0]>>22
|
num_unit = vals[0]>>22
|
||||||
if IOCTL > 0: print(f"{num_unit=} {state_block=} {state_src=} {state_type=} {dst_off=}")
|
if IOCTL > 0: print(f"{num_unit=} {state_block=} {state_src=} {state_type=} {dst_off=}")
|
||||||
|
|
||||||
if state_block == SB6_CS_SHADER and IOCTL > 2:
|
if "LOAD_FRAGS" not in CAPTURED_STATE: CAPTURED_STATE['LOAD_FRAGS'] = []
|
||||||
|
CAPTURED_STATE['LOAD_FRAGS'].append((state_block, state_type, num_unit, dst_off))
|
||||||
|
|
||||||
|
if state_block == SB6_CS_SHADER:
|
||||||
from extra.disassemblers.adreno import disasm_raw
|
from extra.disassemblers.adreno import disasm_raw
|
||||||
if state_type == ST6_SHADER: disasm_raw(get_mem(((vals[2] << 32) | vals[1]), num_unit * 128))
|
if state_type == ST6_SHADER and IOCTL > 2:
|
||||||
if state_type == ST6_CONSTANTS: hexdump(get_mem(((vals[2] << 32) | vals[1]), min(0x180, num_unit*4)))
|
disasm_raw(get_mem(((vals[2] << 32) | vals[1]), num_unit * 128))
|
||||||
|
if state_type == ST6_CONSTANTS:
|
||||||
|
x = get_mem(((vals[2] << 32) | vals[1]), num_unit*4)
|
||||||
|
CAPTURED_STATE['constants'] = x[:]
|
||||||
|
if IOCTL > 2:
|
||||||
|
print('constants')
|
||||||
|
hexdump(x)
|
||||||
if state_type == ST6_IBO:
|
if state_type == ST6_IBO:
|
||||||
ibos_bytes = get_mem((vals[2] << 32) | vals[1], num_unit * 16 * 4)
|
ibos_bytes = get_mem((vals[2] << 32) | vals[1], num_unit * 16 * 4)
|
||||||
CAPTURED_STATE['ibos'] = ibos_bytes[:]
|
CAPTURED_STATE['ibos'] = ibos_bytes[:]
|
||||||
if IOCTL > 0:
|
if IOCTL > 1:
|
||||||
print('texture ibos')
|
print('texture ibos')
|
||||||
hexdump(ibos_bytes)
|
hexdump(ibos_bytes)
|
||||||
elif state_block == SB6_CS_TEX and IOCTL > 2:
|
elif state_block == SB6_CS_TEX:
|
||||||
if state_type == ST6_SHADER:
|
if state_type == ST6_SHADER:
|
||||||
samplers_bytes = get_mem((vals[2] << 32) | vals[1], num_unit * 4 * 4)
|
samplers_bytes = get_mem((vals[2] << 32) | vals[1], num_unit * 4 * 4)
|
||||||
CAPTURED_STATE['samplers'] = ibos_bytes[:]
|
CAPTURED_STATE['samplers'] = samplers_bytes[:]
|
||||||
if IOCTL > 0:
|
if IOCTL > 1:
|
||||||
print('texture samplers')
|
print('texture samplers')
|
||||||
hexdump(samplers_bytes)
|
hexdump(samplers_bytes)
|
||||||
if state_type == ST6_CONSTANTS:
|
if state_type == ST6_CONSTANTS:
|
||||||
descriptors_bytes = get_mem((vals[2] << 32) | vals[1], num_unit * 16 * 4)
|
descriptors_bytes = get_mem((vals[2] << 32) | vals[1], 1600)
|
||||||
CAPTURED_STATE['descriptors'] = ibos_bytes[:]
|
CAPTURED_STATE['descriptors'] = descriptors_bytes[:]
|
||||||
if IOCTL > 0:
|
if IOCTL > 1:
|
||||||
print('texture descriptors')
|
print('texture descriptors')
|
||||||
hexdump(descriptors_bytes)
|
hexdump(descriptors_bytes)
|
||||||
|
|
||||||
|
@ -200,10 +210,19 @@ install_hook(libc.ioctl, ioctl)
|
||||||
def before_launch():
|
def before_launch():
|
||||||
global CAPTURED_STATE
|
global CAPTURED_STATE
|
||||||
CAPTURED_STATE.clear()
|
CAPTURED_STATE.clear()
|
||||||
def collect_last_launch_state(): return CAPTURED_STATE
|
def collect_last_launch_state():
|
||||||
|
global CAPTURED_STATE
|
||||||
|
return deepcopy(CAPTURED_STATE)
|
||||||
def compare_launch_state(state, good_state):
|
def compare_launch_state(state, good_state):
|
||||||
cmp = [
|
cmp = [
|
||||||
(adreno.REG_A6XX_SP_CS_CONFIG, 0xffffffff),
|
(adreno.REG_A6XX_SP_CS_CONFIG, adreno.A6XX_SP_CS_CONFIG_NTEX__MASK),
|
||||||
|
(adreno.REG_A6XX_SP_CS_CONFIG, adreno.A6XX_SP_CS_CONFIG_NSAMP__MASK),
|
||||||
|
(adreno.REG_A6XX_SP_CS_CONFIG, adreno.A6XX_SP_CS_CONFIG_NIBO__MASK),
|
||||||
|
(adreno.REG_A6XX_SP_CS_CONFIG, adreno.A6XX_SP_CS_CONFIG_ENABLED),
|
||||||
|
(adreno.REG_A6XX_SP_CS_CONFIG, adreno.A6XX_SP_CS_CONFIG_BINDLESS_TEX),
|
||||||
|
(adreno.REG_A6XX_SP_CS_CONFIG, adreno.A6XX_SP_CS_CONFIG_BINDLESS_SAMP),
|
||||||
|
(adreno.REG_A6XX_SP_CS_CONFIG, adreno.A6XX_SP_CS_CONFIG_BINDLESS_IBO),
|
||||||
|
(adreno.REG_A6XX_SP_CS_CONFIG, adreno.A6XX_SP_CS_CONFIG_BINDLESS_UBO),
|
||||||
|
|
||||||
(adreno.REG_A6XX_SP_CS_CTRL_REG0, adreno.A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__MASK),
|
(adreno.REG_A6XX_SP_CS_CTRL_REG0, adreno.A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT__MASK),
|
||||||
(adreno.REG_A6XX_SP_CS_CTRL_REG0, adreno.A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__MASK),
|
(adreno.REG_A6XX_SP_CS_CTRL_REG0, adreno.A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT__MASK),
|
||||||
|
@ -213,20 +232,53 @@ def compare_launch_state(state, good_state):
|
||||||
(adreno.REG_A6XX_SP_CS_CTRL_REG0, adreno.A6XX_SP_CS_CTRL_REG0_EARLYPREAMBLE),
|
(adreno.REG_A6XX_SP_CS_CTRL_REG0, adreno.A6XX_SP_CS_CTRL_REG0_EARLYPREAMBLE),
|
||||||
(adreno.REG_A6XX_SP_CS_CTRL_REG0, adreno.A6XX_SP_CS_CTRL_REG0_MERGEDREGS),
|
(adreno.REG_A6XX_SP_CS_CTRL_REG0, adreno.A6XX_SP_CS_CTRL_REG0_MERGEDREGS),
|
||||||
|
|
||||||
|
(adreno.REG_A6XX_SP_CS_PVT_MEM_PARAM, adreno.A6XX_SP_CS_PVT_MEM_PARAM_MEMSIZEPERITEM__MASK),
|
||||||
|
(adreno.REG_A6XX_SP_CS_PVT_MEM_PARAM, adreno.A6XX_SP_CS_PVT_MEM_PARAM_HWSTACKSIZEPERTHREAD__MASK),
|
||||||
|
|
||||||
(adreno.REG_A6XX_SP_CS_UNKNOWN_A9B1, adreno.A6XX_SP_CS_UNKNOWN_A9B1_UNK5),
|
(adreno.REG_A6XX_SP_CS_UNKNOWN_A9B1, adreno.A6XX_SP_CS_UNKNOWN_A9B1_UNK5),
|
||||||
(adreno.REG_A6XX_SP_CS_UNKNOWN_A9B1, adreno.A6XX_SP_CS_UNKNOWN_A9B1_UNK6),
|
(adreno.REG_A6XX_SP_CS_UNKNOWN_A9B1, adreno.A6XX_SP_CS_UNKNOWN_A9B1_UNK6),
|
||||||
|
|
||||||
(adreno.REG_A6XX_SP_CS_BRANCH_COND, 0xffffffff),
|
(adreno.REG_A6XX_SP_CS_BRANCH_COND, 0xffffffff),
|
||||||
|
|
||||||
|
(adreno.REG_A6XX_HLSQ_CS_NDRANGE_0, adreno.A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM__MASK),
|
||||||
|
(adreno.REG_A6XX_HLSQ_CS_NDRANGE_0, adreno.A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX__MASK),
|
||||||
|
(adreno.REG_A6XX_HLSQ_CS_NDRANGE_0, adreno.A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY__MASK),
|
||||||
|
(adreno.REG_A6XX_HLSQ_CS_NDRANGE_0, adreno.A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ__MASK),
|
||||||
|
|
||||||
|
(adreno.REG_A6XX_HLSQ_CS_NDRANGE_1, 0xffffffff),
|
||||||
|
(adreno.REG_A6XX_HLSQ_CS_NDRANGE_2, 0xffffffff),
|
||||||
|
(adreno.REG_A6XX_HLSQ_CS_NDRANGE_3, 0xffffffff),
|
||||||
|
(adreno.REG_A6XX_HLSQ_CS_NDRANGE_4, 0xffffffff),
|
||||||
|
(adreno.REG_A6XX_HLSQ_CS_NDRANGE_5, 0xffffffff),
|
||||||
|
(adreno.REG_A6XX_HLSQ_CS_NDRANGE_6, 0xffffffff),
|
||||||
|
|
||||||
|
(adreno.REG_A6XX_HLSQ_CS_CNTL_0, 0xffffffff),
|
||||||
|
(adreno.REG_A6XX_HLSQ_CS_CNTL_1, 0xffffffff),
|
||||||
|
(adreno.REG_A6XX_HLSQ_CS_KERNEL_GROUP_X, 0xffffffff),
|
||||||
|
(adreno.REG_A6XX_HLSQ_CS_KERNEL_GROUP_Y, 0xffffffff),
|
||||||
|
(adreno.REG_A6XX_HLSQ_CS_KERNEL_GROUP_Z, 0xffffffff),
|
||||||
]
|
]
|
||||||
|
|
||||||
for x,m in cmp:
|
for x,m in cmp:
|
||||||
|
print(f"Field {REGS[x]}, mask: 0x{m:X} cmp: {state.get(x, 0) & m} vs {good_state.get(x, 0) & m}")
|
||||||
if state.get(x, 0) & m != good_state.get(x, 0) & m:
|
if state.get(x, 0) & m != good_state.get(x, 0) & m:
|
||||||
return False, f"Field {REGS[x]}, mask: {x:X} mismatch: {state.get(x, 0) & m} vs {good_state.get(x, 0) & m}"
|
return False, f"Field {REGS[x]}, mask: 0x{m:X} mismatch: {state.get(x, 0) & m} vs {good_state.get(x, 0) & m}"
|
||||||
|
|
||||||
for n in ['ibos', 'samplers', 'descriptors']:
|
for n in ['descriptors', 'ibos']:
|
||||||
if n not in good_state: continue
|
if n not in good_state: continue
|
||||||
mv1, mv2 = state.get(n), good_state.get(n)
|
mv1, mv2 = state.get(n), good_state.get(n)
|
||||||
if len(mv1) != len(mv2): return False, f"{n}: len mismatch"
|
|
||||||
|
if len(mv1) != len(mv2): return False, f"{n}: len mismatch {len(mv1)} != {len(mv2)}"
|
||||||
|
mv1 = memoryview(bytearray(mv1)).cast('I')
|
||||||
|
mv2 = memoryview(bytearray(mv2)).cast('I')
|
||||||
|
for i in range(len(mv2)):
|
||||||
|
if i % 8 == 5 or i % 8 == 4: continue # addresses
|
||||||
|
if mv1[i]!=mv2[i]: return False, f"{n}: content mismatch {i} {mv1[i]} {mv2[i]}"
|
||||||
|
|
||||||
|
for n in ['samplers']:
|
||||||
|
if n not in good_state: continue
|
||||||
|
mv1, mv2 = state.get(n), good_state.get(n)
|
||||||
|
if len(mv1) != len(mv2): return False, f"{n}: len mismatch {len(mv1)} != {len(mv2)}"
|
||||||
if any(mv1[i]!=mv2[i] for i in range(len(mv1))): return False, f"{n}: content mismatch"
|
if any(mv1[i]!=mv2[i] for i in range(len(mv1))): return False, f"{n}: content mismatch"
|
||||||
|
|
||||||
return True, "PASS"
|
return True, "PASS"
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -30,7 +30,7 @@ from test.helpers import is_dtype_supported
|
||||||
|
|
||||||
def on_linearizer_will_run(): pass
|
def on_linearizer_will_run(): pass
|
||||||
def on_linearizer_did_run(): pass
|
def on_linearizer_did_run(): pass
|
||||||
def compare_states(x, y): return True
|
def compare_states(x, y): return (True, "")
|
||||||
|
|
||||||
if getenv("VALIDATE_HCQ"):
|
if getenv("VALIDATE_HCQ"):
|
||||||
if Device.DEFAULT == "NV":
|
if Device.DEFAULT == "NV":
|
||||||
|
|
|
@ -151,7 +151,7 @@ class QCOMComputeQueue(HWComputeQueue):
|
||||||
|
|
||||||
if args_state.prg.tex_cnt > 0:
|
if args_state.prg.tex_cnt > 0:
|
||||||
self.cmd(adreno.CP_LOAD_STATE6_FRAG, qreg.cp_load_state6_0(state_type=adreno.ST_CONSTANTS, state_src=adreno.SS6_INDIRECT,
|
self.cmd(adreno.CP_LOAD_STATE6_FRAG, qreg.cp_load_state6_0(state_type=adreno.ST_CONSTANTS, state_src=adreno.SS6_INDIRECT,
|
||||||
state_block=adreno.SB6_CS_TEX, num_unit=args_state.prg.tex_cnt),
|
state_block=adreno.SB6_CS_TEX, num_unit=min(16, args_state.prg.tex_cnt)),
|
||||||
*data64_le(args_state.ptr + args_state.prg.tex_off))
|
*data64_le(args_state.ptr + args_state.prg.tex_off))
|
||||||
self.reg(adreno.REG_A6XX_SP_CS_TEX_CONST, *data64_le(args_state.ptr + args_state.prg.tex_off))
|
self.reg(adreno.REG_A6XX_SP_CS_TEX_CONST, *data64_le(args_state.ptr + args_state.prg.tex_off))
|
||||||
|
|
||||||
|
@ -244,14 +244,15 @@ class QCOMProgram(HCQProgram):
|
||||||
self.buf_info, self.consts_info = [], []
|
self.buf_info, self.consts_info = [], []
|
||||||
|
|
||||||
# Collect sampler info.
|
# Collect sampler info.
|
||||||
self.samp_cnt = _read_lib(image_desc_off + 0xdc)
|
self.samp_cnt = samp_cnt_in_file = _read_lib(image_desc_off + 0xdc)
|
||||||
assert self.samp_cnt <= 1, "Up to one sampler supported"
|
assert self.samp_cnt <= 1, "Up to one sampler supported"
|
||||||
if self.samp_cnt:
|
if self.samp_cnt:
|
||||||
|
self.samp_cnt += 1
|
||||||
self.samplers = [qreg.a6xx_tex_samp_0(wrap_s=(clamp_mode:=adreno.A6XX_TEX_CLAMP_TO_BORDER), wrap_t=clamp_mode, wrap_r=clamp_mode),
|
self.samplers = [qreg.a6xx_tex_samp_0(wrap_s=(clamp_mode:=adreno.A6XX_TEX_CLAMP_TO_BORDER), wrap_t=clamp_mode, wrap_r=clamp_mode),
|
||||||
qreg.a6xx_tex_samp_1(unnorm_coords=True, cubemapseamlessfiltoff=True), 0, 0]
|
qreg.a6xx_tex_samp_1(unnorm_coords=True, cubemapseamlessfiltoff=True), 0, 0, 0, 0, 0, 0]
|
||||||
|
|
||||||
# Collect kernel arguments (buffers) info.
|
# Collect kernel arguments (buffers) info.
|
||||||
bdoff = round_up(image_desc_off + 0x158 + len(self.name), 4) + 8 * self.samp_cnt
|
bdoff = round_up(image_desc_off + 0x158 + len(self.name), 4) + 8 * samp_cnt_in_file
|
||||||
while bdoff + 32 <= len(self.lib):
|
while bdoff + 32 <= len(self.lib):
|
||||||
length, _, _, offset_words, _, _, _, typ = struct.unpack("IIIIIIII", self.lib[bdoff:bdoff+32])
|
length, _, _, offset_words, _, _, _, typ = struct.unpack("IIIIIIII", self.lib[bdoff:bdoff+32])
|
||||||
if length == 0: break
|
if length == 0: break
|
||||||
|
@ -260,7 +261,7 @@ class QCOMProgram(HCQProgram):
|
||||||
|
|
||||||
# Setting correct offsets to textures/ibos.
|
# Setting correct offsets to textures/ibos.
|
||||||
self.tex_cnt, self.ibo_cnt = sum(x.type is BUFTYPE_TEX for x in self.buf_info), sum(x.type is BUFTYPE_IBO for x in self.buf_info)
|
self.tex_cnt, self.ibo_cnt = sum(x.type is BUFTYPE_TEX for x in self.buf_info), sum(x.type is BUFTYPE_IBO for x in self.buf_info)
|
||||||
self.samp_off, self.ibo_off, self.tex_off = 2048, 2048 + 0x10 * self.samp_cnt, 2048 + 0x10 * self.samp_cnt + 0x40 * self.ibo_cnt
|
self.ibo_off, self.tex_off, self.samp_off = 2048, 2048 + 0x40 * self.ibo_cnt, 2048 + 0x40 * self.tex_cnt + 0x40 * self.ibo_cnt
|
||||||
cur_ibo_off, cur_tex_off = self.ibo_off, self.tex_off
|
cur_ibo_off, cur_tex_off = self.ibo_off, self.tex_off
|
||||||
for x in self.buf_info:
|
for x in self.buf_info:
|
||||||
if x.type is BUFTYPE_IBO: x.offset, cur_ibo_off = cur_ibo_off, cur_ibo_off + 0x40
|
if x.type is BUFTYPE_IBO: x.offset, cur_ibo_off = cur_ibo_off, cur_ibo_off + 0x40
|
||||||
|
@ -304,10 +305,10 @@ class QCOMAllocator(HCQAllocator):
|
||||||
texture.pitch, texture.real_stride = pitch, real_stride
|
texture.pitch, texture.real_stride = pitch, real_stride
|
||||||
|
|
||||||
tex_fmt = adreno.FMT6_32_32_32_32_FLOAT if options.image.itemsize == 4 else adreno.FMT6_16_16_16_16_FLOAT
|
tex_fmt = adreno.FMT6_32_32_32_32_FLOAT if options.image.itemsize == 4 else adreno.FMT6_16_16_16_16_FLOAT
|
||||||
texture.desc[0] = qreg.a6xx_tex_const_0(swiz_x=0, swiz_y=1, swiz_z=2, swiz_w=3, fmt=tex_fmt)
|
texture.desc[0] = qreg.a6xx_tex_const_0(0x8, swiz_x=0, swiz_y=1, swiz_z=2, swiz_w=3, fmt=tex_fmt)
|
||||||
texture.desc[1] = qreg.a6xx_tex_const_1(width=imgw, height=imgh)
|
texture.desc[1] = qreg.a6xx_tex_const_1(width=imgw, height=imgh)
|
||||||
texture.desc[2] = qreg.a6xx_tex_const_2(type=adreno.A6XX_TEX_2D, pitch=texture.pitch, pitchalign=pitchalign-6)
|
texture.desc[2] = qreg.a6xx_tex_const_2(type=adreno.A6XX_TEX_2D, pitch=texture.pitch, pitchalign=pitchalign-6)
|
||||||
texture.desc[4:7] = [*data64_le(texture.va_addr), qreg.a6xx_tex_const_6(plane_pitch=0x400000)]
|
texture.desc[4:8] = [*data64_le(texture.va_addr), qreg.a6xx_tex_const_6(plane_pitch=0x400000), qreg.a6xx_tex_const_7(13)]
|
||||||
texture.ibo = [texture.desc[0] & (~0xffff), *texture.desc[1:len(texture.desc)]]
|
texture.ibo = [texture.desc[0] & (~0xffff), *texture.desc[1:len(texture.desc)]]
|
||||||
|
|
||||||
return texture
|
return texture
|
||||||
|
|
Loading…
Reference in New Issue