db in wal mode (#5388)

This commit is contained in:
wozeparrot 2024-07-13 03:43:36 +00:00 committed by GitHub
parent 414aa6ee98
commit 90f0e2fc49
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 49 additions and 43 deletions

View File

@ -42,11 +42,13 @@ def preprocess(img, new=False):
def _infer(model: EfficientNet, img, bs=1):
old_training = Tensor.training
Tensor.training = False
img = preprocess(img)
# run the net
if bs > 1: img = img.repeat(bs, axis=0)
out = model.forward(Tensor(img))
Tensor.training = old_training
return _LABELS[np.argmax(out.numpy()[0])]
chicken_img = Image.open(pathlib.Path(__file__).parent / 'efficientnet/Chicken.jpg')

View File

@ -48,6 +48,7 @@ class TestConv(unittest.TestCase):
w = Tensor.rand(1,1,3,3)
out = x.conv2d(w, padding=(1,1))
np.testing.assert_allclose(out.relu().numpy(), np.maximum(out.numpy(), 0))
Tensor.no_grad = False
def test_two_binops_no_rerun(self):
Tensor.no_grad = True

View File

@ -416,6 +416,7 @@ class TestSchedule(unittest.TestCase):
check_schedule(x, 3)
def test_resnet_block(self):
old_training = Tensor.training
Tensor.training = False
in_planes, planes = 64, 64
@ -429,6 +430,7 @@ class TestSchedule(unittest.TestCase):
out = bn2(conv2(out))
out = (out + x).relu()
check_schedule(out, 2, [conv1.weight, conv2.weight])
Tensor.training = old_training
def test_contiguous_while_contiguous(self):
x = Tensor.empty(1, 64, 32, 32)

View File

@ -49,7 +49,7 @@ class TestRawDiskBuffer(unittest.TestCase):
with Timing("copy in ", lambda et_ns: f" {test_size/et_ns:.2f} GB/s"):
f.readinto(tst)
def test_bitcasts_on_disk(self):
tmp = tempfile.mktemp()
_, tmp = tempfile.mkstemp()
# ground truth = https://evanw.github.io/float-toy/
t = Tensor.empty((128, 128), dtype=dtypes.uint8, device=f"disk:{tmp}") # uint8
# all zeroes
@ -94,15 +94,15 @@ class TestSafetensors(unittest.TestCase):
"weight3": torch.arange(0, 17, dtype=torch.int32).reshape(17,1,1),
"weight4": torch.arange(0, 2, dtype=torch.uint8),
}
save_file(tensors, temp("model.safetensors"))
save_file(tensors, temp("real.safetensors"))
ret = safe_load(temp("model.safetensors"))
ret = safe_load(temp("real.safetensors"))
for k,v in tensors.items(): np.testing.assert_array_equal(ret[k].numpy(), v.numpy())
safe_save(ret, temp("model.safetensors_alt"))
with open(temp("model.safetensors"), "rb") as f:
with open(temp("model.safetensors_alt"), "rb") as g:
safe_save(ret, temp("real.safetensors_alt"))
with open(temp("real.safetensors"), "rb") as f:
with open(temp("real.safetensors_alt"), "rb") as g:
assert f.read() == g.read()
ret2 = safe_load(temp("model.safetensors_alt"))
ret2 = safe_load(temp("real.safetensors_alt"))
for k,v in tensors.items(): np.testing.assert_array_equal(ret2[k].numpy(), v.numpy())
def test_real_safetensors_open(self):
@ -178,9 +178,9 @@ class TestSafetensors(unittest.TestCase):
"weight_I16": torch.tensor([127, 64], dtype=torch.short),
"weight_BF16": torch.randn((2, 2), dtype=torch.bfloat16),
}
save_file(tensors, temp("model.safetensors"))
save_file(tensors, temp("dtypes.safetensors"))
loaded = safe_load(temp("model.safetensors"))
loaded = safe_load(temp("dtypes.safetensors"))
for k,v in loaded.items():
if v.dtype != dtypes.bfloat16:
assert v.numpy().dtype == tensors[k].numpy().dtype
@ -192,9 +192,9 @@ class TestSafetensors(unittest.TestCase):
"weight_U32": np.array([1, 2, 3], dtype=np.uint32),
"weight_U64": np.array([1, 2, 3], dtype=np.uint64),
}
np_save_file(tensors, temp("model.safetensors"))
np_save_file(tensors, temp("dtypes.safetensors"))
loaded = safe_load(temp("model.safetensors"))
loaded = safe_load(temp("dtypes.safetensors"))
for k,v in loaded.items():
assert v.numpy().dtype == tensors[k].dtype
np.testing.assert_allclose(v.numpy(), tensors[k])
@ -210,65 +210,65 @@ def helper_test_disk_tensor(fn, data, np_fxn, tinygrad_fxn=None):
class TestDiskTensor(unittest.TestCase):
def test_empty(self):
pathlib.Path(temp("dt1")).unlink(missing_ok=True)
Tensor.empty(100, 100, device=f"disk:{temp('dt1')}")
pathlib.Path(temp("dt_empty")).unlink(missing_ok=True)
Tensor.empty(100, 100, device=f"disk:{temp('dt_empty')}")
def test_simple_read(self):
fn = pathlib.Path(temp("dt1"))
fn = pathlib.Path(temp("dt_simple_read"))
fn.unlink(missing_ok=True)
fn.write_bytes(bytes(range(256)))
t = Tensor.empty(16, 16, device=f"disk:{temp('dt1')}", dtype=dtypes.uint8)
t = Tensor.empty(16, 16, device=f"disk:{temp('dt_simple_read')}", dtype=dtypes.uint8)
out = t[1].to(Device.DEFAULT).tolist()
assert out == list(range(16, 32))
def test_simple_read_bitcast(self):
fn = pathlib.Path(temp("dt1"))
fn = pathlib.Path(temp("dt_simple_read_bitcast"))
fn.unlink(missing_ok=True)
fn.write_bytes(bytes(range(256))*2)
t = Tensor.empty(16, 16*2, device=f"disk:{temp('dt1')}", dtype=dtypes.uint8)
t = Tensor.empty(16, 16*2, device=f"disk:{temp('dt_simple_read_bitcast')}", dtype=dtypes.uint8)
out = t[1].bitcast(dtypes.uint16).to(Device.DEFAULT).tolist()
tout = [(x//256, x%256) for x in out]
assert tout == list([(x+1,x) for x in range(32,64,2)])
def test_simple_read_bitcast_alt(self):
fn = pathlib.Path(temp("dt1"))
fn = pathlib.Path(temp("dt_simple_read_bitcast_alt"))
fn.unlink(missing_ok=True)
fn.write_bytes(bytes(range(256))*2)
t = Tensor.empty(16, 16*2, device=f"disk:{temp('dt1')}", dtype=dtypes.uint8)
t = Tensor.empty(16, 16*2, device=f"disk:{temp('dt_simple_read_bitcast_alt')}", dtype=dtypes.uint8)
out = t.bitcast(dtypes.uint16)[1].to(Device.DEFAULT).tolist()
tout = [(x//256, x%256) for x in out]
assert tout == list([(x+1,x) for x in range(32,64,2)])
def test_write_ones(self):
pathlib.Path(temp("dt2")).unlink(missing_ok=True)
pathlib.Path(temp("dt_write_ones")).unlink(missing_ok=True)
out = Tensor.ones(10, 10, device="CLANG").contiguous()
outdisk = out.to(f"disk:{temp('dt2')}")
outdisk = out.to(f"disk:{temp('dt_write_ones')}")
print(outdisk)
outdisk.realize()
del out, outdisk
import struct
# test file
with open(temp("dt2"), "rb") as f:
with open(temp("dt_write_ones"), "rb") as f:
assert f.read() == struct.pack('<f', 1.0) * 100 == b"\x00\x00\x80\x3F" * 100
# test load alt
reloaded = Tensor.empty(10, 10, device=f"disk:{temp('dt2')}")
reloaded = Tensor.empty(10, 10, device=f"disk:{temp('dt_write_ones')}")
np.testing.assert_almost_equal(reloaded.numpy(), np.ones((10, 10)))
def test_assign_slice(self):
def assign(x,s,y): x[s] = y
helper_test_disk_tensor("dt3", [0,1,2,3], lambda x: assign(x, slice(0,2), [13, 12]))
helper_test_disk_tensor("dt4", [[0,1,2,3],[4,5,6,7]], lambda x: assign(x, slice(0,1), [[13, 12, 11, 10]]))
helper_test_disk_tensor("dt_assign_slice_1", [0,1,2,3], lambda x: assign(x, slice(0,2), [13, 12]))
helper_test_disk_tensor("dt_assign_slice_2", [[0,1,2,3],[4,5,6,7]], lambda x: assign(x, slice(0,1), [[13, 12, 11, 10]]))
def test_reshape(self):
helper_test_disk_tensor("dt5", [1,2,3,4,5], lambda x: x.reshape((1,5)))
helper_test_disk_tensor("dt6", [1,2,3,4], lambda x: x.reshape((2,2)))
helper_test_disk_tensor("dt_reshape_1", [1,2,3,4,5], lambda x: x.reshape((1,5)))
helper_test_disk_tensor("dt_reshape_2", [1,2,3,4], lambda x: x.reshape((2,2)))
def test_assign_to_different_dtype(self):
# NOTE: this is similar to Y_train in fetch_cifar
t = Tensor.empty(10, device=f'disk:{temp("dt7")}', dtype=dtypes.int64)
t = Tensor.empty(10, device=f'disk:{temp("dt_assign_to_different_dtype")}', dtype=dtypes.int64)
for i in range(5):
data = np.array([3, 3])
@ -278,58 +278,58 @@ class TestDiskTensor(unittest.TestCase):
np.testing.assert_array_equal(t.numpy(), np.array([3] * 10))
def test_bitcast(self):
with open(temp('range_1020'), "wb") as f: f.write(bytes(range(10,20)))
t = Tensor.empty(5, dtype=dtypes.int16, device=f"disk:{temp('range_1020')}")
with open(temp('dt_bitcast'), "wb") as f: f.write(bytes(range(10,20)))
t = Tensor.empty(5, dtype=dtypes.int16, device=f"disk:{temp('dt_bitcast')}")
ret = t.to("CLANG").bitcast(dtypes.uint16) + 1
assert ret.tolist() == [2827, 3341, 3855, 4369, 4883]
def test_bitcast_view(self):
with open(temp('range_1020'), "wb") as f: f.write(bytes(range(10, 24)))
t = Tensor.empty(3, dtype=dtypes.uint, device=f"disk:{temp('range_1020')}").shrink([(0, 2)])
with open(temp('dt_bitcast_view'), "wb") as f: f.write(bytes(range(10, 24)))
t = Tensor.empty(3, dtype=dtypes.uint, device=f"disk:{temp('dt_bitcast_view')}").shrink([(0, 2)])
ret = t.bitcast(dtypes.uint16).to("CLANG") + 1
assert ret.tolist() == [2827, 3341, 3855, 4369]
def test_bf16_disk_write_read(self):
t = Tensor([10000, -1, -1000, -10000, 20], dtype=dtypes.float32)
t.to(f"disk:{temp('f32')}").realize()
t.to(f"disk:{temp('dt_bf16_disk_write_read_f32')}").realize()
# hack to "cast" f32 -> bf16
with open(temp('f32'), "rb") as f: dat = f.read()
with open(temp('dt_bf16_disk_write_read_f32'), "rb") as f: dat = f.read()
adat = b''.join([dat[i+2:i+4] for i in range(0, len(dat), 4)])
with open(temp('bf16'), "wb") as f: f.write(adat)
with open(temp('dt_bf16_disk_write_read_bf16'), "wb") as f: f.write(adat)
t = Tensor.empty(5, dtype=dtypes.bfloat16, device=f"disk:{temp('bf16')}")
t = Tensor.empty(5, dtype=dtypes.bfloat16, device=f"disk:{temp('dt_bf16_disk_write_read_bf16')}")
ct = t.llvm_bf16_cast(dtypes.float)
assert ct.numpy().tolist() == [9984., -1, -1000, -9984, 20]
def test_copy_from_disk(self):
fn = pathlib.Path(temp("shco1"))
fn = pathlib.Path(temp("dt_copy_from_disk"))
fn.unlink(missing_ok=True)
fn.write_bytes(bytes(range(256))*1024)
t = Tensor.empty(256*1024, device=f"disk:{temp('shco1')}", dtype=dtypes.uint8)
t = Tensor.empty(256*1024, device=f"disk:{temp('dt_copy_from_disk')}", dtype=dtypes.uint8)
on_dev = t.to(Device.DEFAULT).realize()
np.testing.assert_equal(on_dev.numpy(), t.numpy())
def test_copy_from_disk_offset(self):
fn = pathlib.Path(temp("shco2"))
fn = pathlib.Path(temp("dt_copy_from_disk_offset"))
fn.unlink(missing_ok=True)
fn.write_bytes(bytes(range(256))*1024)
for off in [314, 991, 2048, 4096]:
t = Tensor.empty(256*1024, device=f"disk:{temp('shco2')}", dtype=dtypes.uint8)[off:]
t = Tensor.empty(256*1024, device=f"disk:{temp('dt_copy_from_disk_offset')}", dtype=dtypes.uint8)[off:]
on_dev = t.to(Device.DEFAULT).realize()
np.testing.assert_equal(on_dev.numpy(), t.numpy())
def test_copy_from_disk_huge(self):
if CI and not hasattr(Device["DISK"], 'io_uring'): self.skipTest("slow on ci without iouring")
fn = pathlib.Path(temp("shco3"))
fn = pathlib.Path(temp("dt_copy_from_disk_huge"))
fn.unlink(missing_ok=True)
fn.write_bytes(bytes(range(256))*1024*256)
for off in [0, 551]:
t = Tensor.empty(256*1024*256, device=f"disk:{temp('shco3')}", dtype=dtypes.uint8)[off:]
t = Tensor.empty(256*1024*256, device=f"disk:{temp('dt_copy_from_disk_huge')}", dtype=dtypes.uint8)[off:]
on_dev = t.to(Device.DEFAULT).realize()
np.testing.assert_equal(on_dev.numpy(), t.numpy())

View File

@ -197,7 +197,8 @@ def db_connection():
global _db_connection
if _db_connection is None:
os.makedirs(CACHEDB.rsplit(os.sep, 1)[0], exist_ok=True)
_db_connection = sqlite3.connect(CACHEDB)
_db_connection = sqlite3.connect(CACHEDB, timeout=30)
_db_connection.execute("PRAGMA journal_mode=WAL")
if DEBUG >= 7: _db_connection.set_trace_callback(print)
return _db_connection