mirror of https://github.com/commaai/tinygrad.git
better cacheline test
This commit is contained in:
parent
76db1af6fc
commit
b05c2828f7
|
@ -56,7 +56,37 @@ def reg_count(nthread, ngrp, nreg):
|
|||
@register_test
|
||||
def test_reg_count(nthread=1, ngrp=1):
|
||||
base = reg_count(nthread, ngrp, 1)
|
||||
return [(nreg, (reg_count(nthread, ngrp, nreg)-base)/nreg) for nreg in trange(2, 257)] # archprobe goes to 512
|
||||
return [(nreg, (reg_count(nthread, ngrp, nreg)-base)/nreg) for nreg in trange(4, 513, 4)]
|
||||
|
||||
def buf_cacheline_size(stride):
|
||||
BUF_CACHE_SIZE = 128*1024
|
||||
NTHREAD_LOGIC = 256
|
||||
PITCH = BUF_CACHE_SIZE * 2 // NTHREAD_LOGIC
|
||||
BUF_SIZE = PITCH * NTHREAD_LOGIC
|
||||
|
||||
prg = """__kernel void buf_cacheline_size(
|
||||
__global const float* src,
|
||||
__global float* dst,
|
||||
__private const int niter,
|
||||
__private const int stride,
|
||||
__private const int pitch
|
||||
) {
|
||||
float c = 0;
|
||||
for (int i = 0; i < niter; ++i) {
|
||||
const int zero = i >> 31;
|
||||
c += src[zero + stride * 0 + pitch * get_global_id(0)];
|
||||
c += src[zero + stride * 1 + pitch * get_global_id(0)];
|
||||
}
|
||||
dst[0] = c;
|
||||
}"""
|
||||
in_buf = CLBuffer(BUF_SIZE, dtypes.float32)
|
||||
out_buf = CLBuffer(1, dtypes.float32)
|
||||
cl = CLProgram("buf_cacheline_size", prg, argdtypes=[None, None, np.int32, np.int32, np.int32])
|
||||
return min([cl([NTHREAD_LOGIC, 1, 1], [NTHREAD_LOGIC, 1, 1], in_buf, out_buf, 10, stride, PITCH, wait=True) for _ in range(5)])*1e9
|
||||
|
||||
@register_test
|
||||
def test_cacheline_size():
|
||||
return [(stride, buf_cacheline_size(stride)) for stride in trange(1,64)]
|
||||
|
||||
def buf_cache_hierarchy_pchase(ndata, stride=1, NCOMP=1, steps=65536):
|
||||
ndata //= NCOMP*4 # ptr size
|
||||
|
@ -78,10 +108,6 @@ def buf_cache_hierarchy_pchase(ndata, stride=1, NCOMP=1, steps=65536):
|
|||
cl = CLProgram("buf_cache_hierarchy_pchase", prg, argdtypes=[None, None, np.int32])
|
||||
return min([cl([1, 1, 1], [1, 1, 1], in_buf, out_buf, steps, wait=True)/steps for _ in range(5)])*1e9
|
||||
|
||||
@register_test
|
||||
def test_cacheline_size():
|
||||
return [(stride, buf_cache_hierarchy_pchase(65536, stride, steps=65536)) for stride in trange(1,64)]
|
||||
|
||||
@register_test
|
||||
def test_memory_latency():
|
||||
# requires cacheline < 16
|
||||
|
@ -125,7 +151,7 @@ def gflops(niter=4, nroll=4, ngroups=4096):
|
|||
out_buf[get_global_id(0) >> 31] = {'+'.join(f"y.s{'0123456789abcdef'[i]}" for i in range(NCOMP))};
|
||||
}}"""
|
||||
out_buf = CLBuffer(1, dtypes.float32)
|
||||
cl = CLProgram("gflops", prg)
|
||||
cl = CLProgram("gflops", prg, options="-Werror -cl-mad-enable -cl-fast-relaxed-math")
|
||||
FLOPS = NCOMP*2*2 * niter * nroll * ngroups * 32
|
||||
# NOTE: if nay of the niters form a local group, this is wrong
|
||||
return FLOPS/(min([cl([32, ngroups, 1], [32, 1, 1], out_buf, wait=True) for _ in range(10)])*1e9)
|
||||
|
@ -137,7 +163,7 @@ def test_gflops():
|
|||
if __name__ == "__main__":
|
||||
cache = {}
|
||||
#cache = pickle.load(open("/tmp/cache.pkl", "rb"))
|
||||
#tests = {"test_gflops": tests["test_gflops"]}
|
||||
#tests = {"test_cacheline_size": tests["test_cacheline_size"]}
|
||||
plt.figure(figsize=(16, 9))
|
||||
for i,(k,test) in enumerate(tests.items()):
|
||||
print(f"running {k}")
|
||||
|
|
|
@ -40,10 +40,10 @@ class CLBuffer(RawBufferCopyInOut):
|
|||
cl.enqueue_copy(CL.cl_queue, x, self._buf, is_blocking=True)
|
||||
|
||||
class CLProgram:
|
||||
def __init__(self, name:str, prg:str, binary=False, argdtypes=None):
|
||||
def __init__(self, name:str, prg:str, binary=False, argdtypes=None, options=None):
|
||||
self.name, self.argdtypes, self.clprogram = name, argdtypes, cl.Program(CL.cl_ctx, CL.cl_ctx.devices, [prg]) if binary else cl.Program(CL.cl_ctx, prg) # type: ignore
|
||||
try:
|
||||
self._clprg = self.clprogram.build()
|
||||
self._clprg = self.clprogram.build(options=options)
|
||||
except cl.RuntimeError as e:
|
||||
if DEBUG >= 3: print("FAILED TO BUILD", prg)
|
||||
raise e
|
||||
|
|
Loading…
Reference in New Issue