mirror of https://github.com/commaai/openpilot.git
200 lines
6.3 KiB
Python
200 lines
6.3 KiB
Python
# pylint: skip-file
|
|
|
|
import os
|
|
import time
|
|
import tempfile
|
|
import threading
|
|
import urllib.parse
|
|
import pycurl
|
|
from hashlib import sha256
|
|
from io import BytesIO
|
|
from tenacity import retry, wait_random_exponential, stop_after_attempt
|
|
from common.file_helpers import mkdirs_exists_ok, atomic_write_in_dir
|
|
# Cache chunk size
|
|
K = 1000
|
|
CHUNK_SIZE = 1000 * K
|
|
|
|
CACHE_DIR = os.environ.get("COMMA_CACHE", "/tmp/comma_download_cache/")
|
|
|
|
|
|
def hash_256(link):
|
|
hsh = str(sha256((link.split("?")[0]).encode('utf-8')).hexdigest())
|
|
return hsh
|
|
|
|
|
|
class URLFile:
|
|
_tlocal = threading.local()
|
|
|
|
def __init__(self, url, debug=False, cache=None):
|
|
self._url = url
|
|
self._pos = 0
|
|
self._length = None
|
|
self._local_file = None
|
|
self._debug = debug
|
|
# True by default, false if FILEREADER_CACHE is defined, but can be overwritten by the cache input
|
|
self._force_download = not int(os.environ.get("FILEREADER_CACHE", "0"))
|
|
if cache is not None:
|
|
self._force_download = not cache
|
|
|
|
try:
|
|
self._curl = self._tlocal.curl
|
|
except AttributeError:
|
|
self._curl = self._tlocal.curl = pycurl.Curl()
|
|
mkdirs_exists_ok(CACHE_DIR)
|
|
|
|
def __enter__(self):
|
|
return self
|
|
|
|
def __exit__(self, exc_type, exc_value, traceback):
|
|
if self._local_file is not None:
|
|
os.remove(self._local_file.name)
|
|
self._local_file.close()
|
|
self._local_file = None
|
|
|
|
@retry(wait=wait_random_exponential(multiplier=1, max=5), stop=stop_after_attempt(3), reraise=True)
|
|
def get_length_online(self):
|
|
c = self._curl
|
|
c.reset()
|
|
c.setopt(pycurl.NOSIGNAL, 1)
|
|
c.setopt(pycurl.TIMEOUT_MS, 500000)
|
|
c.setopt(pycurl.FOLLOWLOCATION, True)
|
|
c.setopt(pycurl.URL, self._url)
|
|
c.setopt(c.NOBODY, 1)
|
|
c.perform()
|
|
length = int(c.getinfo(c.CONTENT_LENGTH_DOWNLOAD))
|
|
c.reset()
|
|
return length
|
|
|
|
def get_length(self):
|
|
if self._length is not None:
|
|
return self._length
|
|
file_length_path = os.path.join(CACHE_DIR, hash_256(self._url) + "_length")
|
|
if os.path.exists(file_length_path) and not self._force_download:
|
|
with open(file_length_path) as file_length:
|
|
content = file_length.read()
|
|
self._length = int(content)
|
|
return self._length
|
|
|
|
self._length = self.get_length_online()
|
|
if not self._force_download:
|
|
with atomic_write_in_dir(file_length_path, mode="w") as file_length:
|
|
file_length.write(str(self._length))
|
|
return self._length
|
|
|
|
def read(self, ll=None):
|
|
if self._force_download:
|
|
return self.read_aux(ll=ll)
|
|
|
|
file_begin = self._pos
|
|
file_end = self._pos + ll if ll is not None else self.get_length()
|
|
assert file_end != -1, f"Remote file is empty or doesn't exist: {self._url}"
|
|
# We have to align with chunks we store. Position is the begginiing of the latest chunk that starts before or at our file
|
|
position = (file_begin // CHUNK_SIZE) * CHUNK_SIZE
|
|
response = b""
|
|
while True:
|
|
self._pos = position
|
|
chunk_number = self._pos / CHUNK_SIZE
|
|
file_name = hash_256(self._url) + "_" + str(chunk_number)
|
|
full_path = os.path.join(CACHE_DIR, str(file_name))
|
|
data = None
|
|
# If we don't have a file, download it
|
|
if not os.path.exists(full_path):
|
|
data = self.read_aux(ll=CHUNK_SIZE)
|
|
with atomic_write_in_dir(full_path, mode="wb") as new_cached_file:
|
|
new_cached_file.write(data)
|
|
else:
|
|
with open(full_path, "rb") as cached_file:
|
|
data = cached_file.read()
|
|
|
|
response += data[max(0, file_begin - position): min(CHUNK_SIZE, file_end - position)]
|
|
|
|
position += CHUNK_SIZE
|
|
if position >= file_end:
|
|
self._pos = file_end
|
|
return response
|
|
|
|
@retry(wait=wait_random_exponential(multiplier=1, max=5), stop=stop_after_attempt(3), reraise=True)
|
|
def read_aux(self, ll=None):
|
|
download_range = False
|
|
headers = ["Connection: keep-alive"]
|
|
if self._pos != 0 or ll is not None:
|
|
if ll is None:
|
|
end = self.get_length() - 1
|
|
else:
|
|
end = min(self._pos + ll, self.get_length()) - 1
|
|
if self._pos >= end:
|
|
return b""
|
|
headers.append(f"Range: bytes={self._pos}-{end}")
|
|
download_range = True
|
|
|
|
dats = BytesIO()
|
|
c = self._curl
|
|
c.setopt(pycurl.URL, self._url)
|
|
c.setopt(pycurl.WRITEDATA, dats)
|
|
c.setopt(pycurl.NOSIGNAL, 1)
|
|
c.setopt(pycurl.TIMEOUT_MS, 500000)
|
|
c.setopt(pycurl.HTTPHEADER, headers)
|
|
c.setopt(pycurl.FOLLOWLOCATION, True)
|
|
|
|
if self._debug:
|
|
print("downloading", self._url)
|
|
|
|
def header(x):
|
|
if b'MISS' in x:
|
|
print(x.strip())
|
|
|
|
c.setopt(pycurl.HEADERFUNCTION, header)
|
|
|
|
def test(debug_type, debug_msg):
|
|
print(" debug(%d): %s" % (debug_type, debug_msg.strip()))
|
|
|
|
c.setopt(pycurl.VERBOSE, 1)
|
|
c.setopt(pycurl.DEBUGFUNCTION, test)
|
|
t1 = time.time()
|
|
|
|
c.perform()
|
|
|
|
if self._debug:
|
|
t2 = time.time()
|
|
if t2 - t1 > 0.1:
|
|
print(f"get {self._url} {headers!r} {t2 - t1:.f} slow")
|
|
|
|
response_code = c.getinfo(pycurl.RESPONSE_CODE)
|
|
if response_code == 416: # Requested Range Not Satisfiable
|
|
raise Exception(f"Error, range out of bounds {response_code} {headers} ({self._url}): {repr(dats.getvalue())[:500]}")
|
|
if download_range and response_code != 206: # Partial Content
|
|
raise Exception(f"Error, requested range but got unexpected response {response_code} {headers} ({self._url}): {repr(dats.getvalue())[:500]}")
|
|
if (not download_range) and response_code != 200: # OK
|
|
raise Exception(f"Error {response_code} {headers} ({self._url}): {repr(dats.getvalue())[:500]}")
|
|
|
|
ret = dats.getvalue()
|
|
self._pos += len(ret)
|
|
return ret
|
|
|
|
def seek(self, pos):
|
|
self._pos = pos
|
|
|
|
@property
|
|
def name(self):
|
|
"""Returns a local path to file with the URLFile's contents.
|
|
|
|
This can be used to interface with modules that require local files.
|
|
"""
|
|
if self._local_file is None:
|
|
_, ext = os.path.splitext(urllib.parse.urlparse(self._url).path)
|
|
local_fd, local_path = tempfile.mkstemp(suffix=ext)
|
|
try:
|
|
os.write(local_fd, self.read())
|
|
local_file = open(local_path, "rb")
|
|
except Exception:
|
|
os.remove(local_path)
|
|
raise
|
|
finally:
|
|
os.close(local_fd)
|
|
|
|
self._local_file = local_file
|
|
self.read = self._local_file.read
|
|
self.seek = self._local_file.seek
|
|
|
|
return self._local_file.name
|