# pylint: skip-file import os import time import tempfile import threading import urllib.parse import pycurl from io import BytesIO from tenacity import retry, wait_random_exponential, stop_after_attempt class URLFile(object): _tlocal = threading.local() def __init__(self, url, debug=False): self._url = url self._pos = 0 self._local_file = None self._debug = debug try: self._curl = self._tlocal.curl except AttributeError: self._curl = self._tlocal.curl = pycurl.Curl() def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): if self._local_file is not None: os.remove(self._local_file.name) self._local_file.close() self._local_file = None @retry(wait=wait_random_exponential(multiplier=1, max=5), stop=stop_after_attempt(3), reraise=True) def read(self, ll=None): if ll is None: trange = 'bytes=%d-' % self._pos else: trange = 'bytes=%d-%d' % (self._pos, self._pos + ll - 1) dats = BytesIO() c = self._curl c.setopt(pycurl.URL, self._url) c.setopt(pycurl.WRITEDATA, dats) c.setopt(pycurl.NOSIGNAL, 1) c.setopt(pycurl.TIMEOUT_MS, 500000) c.setopt(pycurl.HTTPHEADER, ["Range: " + trange, "Connection: keep-alive"]) c.setopt(pycurl.FOLLOWLOCATION, True) if self._debug: print("downloading", self._url) def header(x): if b'MISS' in x: print(x.strip()) c.setopt(pycurl.HEADERFUNCTION, header) def test(debug_type, debug_msg): print(" debug(%d): %s" % (debug_type, debug_msg.strip())) c.setopt(pycurl.VERBOSE, 1) c.setopt(pycurl.DEBUGFUNCTION, test) t1 = time.time() c.perform() if self._debug: t2 = time.time() if t2 - t1 > 0.1: print("get %s %r %.f slow" % (self._url, trange, t2 - t1)) response_code = c.getinfo(pycurl.RESPONSE_CODE) if response_code == 416: # Requested Range Not Satisfiable return "" if response_code != 206 and response_code != 200: raise Exception("Error {} ({}): {}".format(response_code, self._url, repr(dats.getvalue())[:500])) ret = dats.getvalue() self._pos += len(ret) return ret def seek(self, pos): self._pos = pos @property def name(self): """Returns a local path to file with the URLFile's contents. This can be used to interface with modules that require local files. """ if self._local_file is None: _, ext = os.path.splitext(urllib.parse.urlparse(self._url).path) local_fd, local_path = tempfile.mkstemp(suffix=ext) try: os.write(local_fd, self.read()) local_file = open(local_path, "rb") except Exception: os.remove(local_path) raise finally: os.close(local_fd) self._local_file = local_file self.read = self._local_file.read self.seek = self._local_file.seek return self._local_file.name