mirror of https://github.com/commaai/tinygrad.git
tiny profiler cleanups (#5126)
This commit is contained in:
parent
4a7d403777
commit
d84beaa6dd
|
@ -267,8 +267,8 @@ class HCQCompatAllocator(LRUAllocator): # pylint: disable=abstract-method
|
|||
self.device._wait_signal(self.device.timeline_signal, self.b_timeline[self.b_next])
|
||||
ctypes.memmove(self.b[self.b_next].va_addr, from_mv(src[i:]), lsize:=min(self.b[self.b_next].size, src.nbytes-i))
|
||||
self.device.hw_copy_queue_t().wait(self.device.timeline_signal, self.device.timeline_value - 1) \
|
||||
.copy(dest.va_addr+i, self.b[self.b_next].va_addr, lsize) \
|
||||
.signal(self.device.timeline_signal, self.device.timeline_value).submit(self.device)
|
||||
.copy(dest.va_addr+i, self.b[self.b_next].va_addr, lsize) \
|
||||
.signal(self.device.timeline_signal, self.device.timeline_value).submit(self.device)
|
||||
self.b_timeline[self.b_next] = self.device.timeline_value
|
||||
self.device.timeline_value += 1
|
||||
|
||||
|
@ -283,8 +283,8 @@ class HCQCompatAllocator(LRUAllocator): # pylint: disable=abstract-method
|
|||
with hcq_profile(self.device, self.device.hw_copy_queue_t, desc=f"DISK -> {self.device.dname}", enabled=PROFILE):
|
||||
for (batch_info, dst_off, src_off, copy_size) in src.device.allocator._copyout_sharded(src, size, _get_temp_buf, seg_len=self.b[0].size):
|
||||
self.device.hw_copy_queue_t().wait(self.device.timeline_signal, self.device.timeline_value - 1) \
|
||||
.copy(dest.va_addr + dst_off, batch_info[0] + src_off, copy_size) \
|
||||
.signal(self.device.timeline_signal, self.device.timeline_value).submit(self.device)
|
||||
.copy(dest.va_addr + dst_off, batch_info[0] + src_off, copy_size) \
|
||||
.signal(self.device.timeline_signal, self.device.timeline_value).submit(self.device)
|
||||
self.b_timeline[batch_info[1]] = self.device.timeline_value
|
||||
self.device.timeline_value += 1
|
||||
|
||||
|
@ -294,8 +294,8 @@ class HCQCompatAllocator(LRUAllocator): # pylint: disable=abstract-method
|
|||
with hcq_profile(self.device, self.device.hw_copy_queue_t, desc=f"{self.device.dname} -> CPU", enabled=PROFILE):
|
||||
for i in range(0, dest.nbytes, self.b[0].size):
|
||||
self.device.hw_copy_queue_t().wait(self.device.timeline_signal, self.device.timeline_value - 1) \
|
||||
.copy(self.b[0].va_addr, src.va_addr+i, lsize:=min(self.b[0].size, dest.nbytes-i)) \
|
||||
.signal(self.device.timeline_signal, self.device.timeline_value).submit(self.device)
|
||||
.copy(self.b[0].va_addr, src.va_addr+i, lsize:=min(self.b[0].size, dest.nbytes-i)) \
|
||||
.signal(self.device.timeline_signal, self.device.timeline_value).submit(self.device)
|
||||
self.device._wait_signal(self.device.timeline_signal, self.device.timeline_value)
|
||||
self.device.timeline_value += 1
|
||||
|
||||
|
@ -306,9 +306,9 @@ class HCQCompatAllocator(LRUAllocator): # pylint: disable=abstract-method
|
|||
|
||||
with hcq_profile(self.device, self.device.hw_copy_queue_t, desc=f"{src_dev.dname} -> {dest_dev.dname}", enabled=PROFILE):
|
||||
src_dev.hw_copy_queue_t().wait(src_dev.timeline_signal, src_dev.timeline_value - 1) \
|
||||
.wait(dest_dev.timeline_signal, dest_dev.timeline_value - 1) \
|
||||
.copy(dest.va_addr, src.va_addr, sz) \
|
||||
.signal(src_dev.timeline_signal, src_dev.timeline_value).submit(src_dev)
|
||||
.wait(dest_dev.timeline_signal, dest_dev.timeline_value - 1) \
|
||||
.copy(dest.va_addr, src.va_addr, sz) \
|
||||
.signal(src_dev.timeline_signal, src_dev.timeline_value).submit(src_dev)
|
||||
dest_dev.hw_compute_queue_t().wait(src_dev.timeline_signal, src_dev.timeline_value).submit(dest_dev)
|
||||
src_dev.timeline_value += 1
|
||||
|
||||
|
|
|
@ -165,8 +165,7 @@ class ProfileLogger:
|
|||
self.subactors[subactor_key] = (tid:=len(self.subactors))
|
||||
self.mjson.append({"name": "thread_name", "ph": "M", "pid": self.actors[actor_name], "tid":tid, "args": {"name": subactor_name}})
|
||||
|
||||
self.mjson.append({"name": name, "ph": "B", "pid": self.actors[actor_name], "tid": self.subactors.get(subactor_key, -1), "ts": st})
|
||||
self.mjson.append({"name": name, "ph": "E", "pid": self.actors[actor_name], "tid": self.subactors.get(subactor_key, -1), "ts": et})
|
||||
self.mjson.append({"name": name, "ph": "X", "pid": self.actors[actor_name], "tid": self.subactors.get(subactor_key, -1), "ts":st, "dur":et-st})
|
||||
|
||||
ProfileLogger.writers -= 1
|
||||
if ProfileLogger.writers == 0:
|
||||
|
|
Loading…
Reference in New Issue