Files
onepilot/common/file_chunker.py
Harald Schäfer 16dda06a0c Reapply chunker (#37292)
* Reapply chunker

* good size

* rm glob

* cleaner

* back to 45mb

* warp need not be fixed

* add manifest path

* lil cleaner
2026-02-23 16:49:48 -08:00

38 lines
1.3 KiB
Python

import math
import os
from pathlib import Path
CHUNK_SIZE = 45 * 1024 * 1024 # 45MB, under GitHub's 50MB limit
def get_chunk_name(name, idx, num_chunks):
return f"{name}.chunk{idx+1:02d}of{num_chunks:02d}"
def get_manifest_path(name):
return f"{name}.chunkmanifest"
def get_chunk_paths(path, file_size):
num_chunks = math.ceil(file_size / CHUNK_SIZE)
return [get_manifest_path(path)] + [get_chunk_name(path, i, num_chunks) for i in range(num_chunks)]
def chunk_file(path, targets):
manifest_path, *chunk_paths = targets
with open(path, 'rb') as f:
data = f.read()
actual_num_chunks = max(1, math.ceil(len(data) / CHUNK_SIZE))
assert len(chunk_paths) >= actual_num_chunks, f"Allowed {len(chunk_paths)} chunks but needs at least {actual_num_chunks}, for path {path}"
for i, chunk_path in enumerate(chunk_paths):
with open(chunk_path, 'wb') as f:
f.write(data[i * CHUNK_SIZE:(i + 1) * CHUNK_SIZE])
Path(manifest_path).write_text(str(len(chunk_paths)))
os.remove(path)
def read_file_chunked(path):
manifest_path = get_manifest_path(path)
if os.path.isfile(manifest_path):
num_chunks = int(Path(manifest_path).read_text().strip())
return b''.join(Path(get_chunk_name(path, i, num_chunks)).read_bytes() for i in range(num_chunks))
if os.path.isfile(path):
return Path(path).read_bytes()
raise FileNotFoundError(path)