mirror of https://github.com/commaai/tinygrad.git
parent
fb3fe6f39b
commit
b95f47784a
|
@ -522,7 +522,7 @@ class HCQCompiled(Compiled):
|
||||||
self.devices.append(self)
|
self.devices.append(self)
|
||||||
|
|
||||||
def synchronize(self):
|
def synchronize(self):
|
||||||
self.timeline_signal.wait(self.timeline_value - 1)
|
self.timeline_signal.wait(self.timeline_value - 1) if not hasattr(self, '_syncdev') else self._syncdev()
|
||||||
|
|
||||||
if self.timeline_value > (1 << 31): self._wrap_timeline_signal()
|
if self.timeline_value > (1 << 31): self._wrap_timeline_signal()
|
||||||
if PROFILE:
|
if PROFILE:
|
||||||
|
|
|
@ -101,7 +101,7 @@ class QCOMComputeQueue(HWComputeQueue):
|
||||||
def _submit(self, device):
|
def _submit(self, device):
|
||||||
if self.binded_device == device: submit_req = self.submit_req
|
if self.binded_device == device: submit_req = self.submit_req
|
||||||
else: submit_req, _ = self._build_gpu_command(device)
|
else: submit_req, _ = self._build_gpu_command(device)
|
||||||
kgsl.IOCTL_KGSL_GPU_COMMAND(device.fd, __payload=submit_req)
|
device.last_cmd = kgsl.IOCTL_KGSL_GPU_COMMAND(device.fd, __payload=submit_req).timestamp
|
||||||
|
|
||||||
def _exec(self, prg, args_state, global_size, local_size):
|
def _exec(self, prg, args_state, global_size, local_size):
|
||||||
global_size_mp = [int(g*l) for g,l in zip(global_size, local_size)]
|
global_size_mp = [int(g*l) for g,l in zip(global_size, local_size)]
|
||||||
|
@ -345,7 +345,7 @@ class QCOMDevice(HCQCompiled):
|
||||||
QCOMDevice.dummy_addr = self._gpu_alloc(0x1000, map_to_cpu=False).va_addr
|
QCOMDevice.dummy_addr = self._gpu_alloc(0x1000, map_to_cpu=False).va_addr
|
||||||
QCOMDevice.signals_page = self._gpu_alloc(16 * 65536, map_to_cpu=True, uncached=True)
|
QCOMDevice.signals_page = self._gpu_alloc(16 * 65536, map_to_cpu=True, uncached=True)
|
||||||
QCOMDevice.signals_pool = [to_mv(self.signals_page.va_addr + off, 16).cast("Q") for off in range(0, self.signals_page.size, 16)]
|
QCOMDevice.signals_pool = [to_mv(self.signals_page.va_addr + off, 16).cast("Q") for off in range(0, self.signals_page.size, 16)]
|
||||||
info, self.ctx, self.cmd_buf, self.cmd_buf_ptr = self._info(), self._ctx_create(), self._gpu_alloc(0x1000000, map_to_cpu=True), 0
|
info, self.ctx, self.cmd_buf, self.cmd_buf_ptr, self.last_cmd = self._info(), self._ctx_create(), self._gpu_alloc(0x1000000, map_to_cpu=True), 0,0
|
||||||
QCOMDevice.gpu_id = ((info.chip_id >> 24) & 0xFF) * 100 + ((info.chip_id >> 16) & 0xFF) * 10 + ((info.chip_id >> 8) & 0xFF)
|
QCOMDevice.gpu_id = ((info.chip_id >> 24) & 0xFF) * 100 + ((info.chip_id >> 16) & 0xFF) * 10 + ((info.chip_id >> 8) & 0xFF)
|
||||||
if QCOMDevice.gpu_id >= 700: raise RuntimeError(f"Unsupported GPU: {QCOMDevice.gpu_id}")
|
if QCOMDevice.gpu_id >= 700: raise RuntimeError(f"Unsupported GPU: {QCOMDevice.gpu_id}")
|
||||||
|
|
||||||
|
@ -398,3 +398,5 @@ class QCOMDevice(HCQCompiled):
|
||||||
self.synchronize()
|
self.synchronize()
|
||||||
self._gpu_free(self._stack)
|
self._gpu_free(self._stack)
|
||||||
self._stack = self._gpu_alloc(sz)
|
self._stack = self._gpu_alloc(sz)
|
||||||
|
|
||||||
|
def _syncdev(self): kgsl.IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID(self.fd, context_id=self.ctx, timestamp=self.last_cmd, timeout=0xffffffff)
|
||||||
|
|
Loading…
Reference in New Issue