mirror of
https://github.com/timsutton/brigadier.git
synced 2025-06-06 01:31:19 -04:00
Update downloader.py
This commit is contained in:
parent
b76ba74827
commit
d3880df0f1
1 changed files with 214 additions and 59 deletions
273
downloader.py
273
downloader.py
|
@ -1,12 +1,145 @@
|
||||||
import sys, os, time, ssl, gzip
|
import sys, os, time, ssl, gzip, multiprocessing
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
# Python-aware urllib stuff
|
# Python-aware urllib stuff
|
||||||
if sys.version_info >= (3, 0):
|
try:
|
||||||
from urllib.request import urlopen, Request
|
from urllib.request import urlopen, Request
|
||||||
else:
|
import queue as q
|
||||||
|
except ImportError:
|
||||||
# Import urllib2 to catch errors
|
# Import urllib2 to catch errors
|
||||||
import urllib2
|
import urllib2
|
||||||
from urllib2 import urlopen, Request
|
from urllib2 import urlopen, Request
|
||||||
|
import Queue as q
|
||||||
|
|
||||||
|
TERMINAL_WIDTH = 120 if os.name=="nt" else 80
|
||||||
|
|
||||||
|
def get_size(size, suffix=None, use_1024=False, round_to=2, strip_zeroes=False):
|
||||||
|
# size is the number of bytes
|
||||||
|
# suffix is the target suffix to locate (B, KB, MB, etc) - if found
|
||||||
|
# use_2014 denotes whether or not we display in MiB vs MB
|
||||||
|
# round_to is the number of dedimal points to round our result to (0-15)
|
||||||
|
# strip_zeroes denotes whether we strip out zeroes
|
||||||
|
|
||||||
|
# Failsafe in case our size is unknown
|
||||||
|
if size == -1:
|
||||||
|
return "Unknown"
|
||||||
|
# Get our suffixes based on use_1024
|
||||||
|
ext = ["B","KiB","MiB","GiB","TiB","PiB"] if use_1024 else ["B","KB","MB","GB","TB","PB"]
|
||||||
|
div = 1024 if use_1024 else 1000
|
||||||
|
s = float(size)
|
||||||
|
s_dict = {} # Initialize our dict
|
||||||
|
# Iterate the ext list, and divide by 1000 or 1024 each time to setup the dict {ext:val}
|
||||||
|
for e in ext:
|
||||||
|
s_dict[e] = s
|
||||||
|
s /= div
|
||||||
|
# Get our suffix if provided - will be set to None if not found, or if started as None
|
||||||
|
suffix = next((x for x in ext if x.lower() == suffix.lower()),None) if suffix else suffix
|
||||||
|
# Get the largest value that's still over 1
|
||||||
|
biggest = suffix if suffix else next((x for x in ext[::-1] if s_dict[x] >= 1), "B")
|
||||||
|
# Determine our rounding approach - first make sure it's an int; default to 2 on error
|
||||||
|
try:round_to=int(round_to)
|
||||||
|
except:round_to=2
|
||||||
|
round_to = 0 if round_to < 0 else 15 if round_to > 15 else round_to # Ensure it's between 0 and 15
|
||||||
|
bval = round(s_dict[biggest], round_to)
|
||||||
|
# Split our number based on decimal points
|
||||||
|
a,b = str(bval).split(".")
|
||||||
|
# Check if we need to strip or pad zeroes
|
||||||
|
b = b.rstrip("0") if strip_zeroes else b.ljust(round_to,"0") if round_to > 0 else ""
|
||||||
|
return "{:,}{} {}".format(int(a),"" if not b else "."+b,biggest)
|
||||||
|
|
||||||
|
def _process_hook(queue, total_size, update_interval=1.0, max_packets=0):
|
||||||
|
bytes_so_far = 0
|
||||||
|
packets = []
|
||||||
|
speed = remaining = ""
|
||||||
|
last_update = time.time()
|
||||||
|
while True:
|
||||||
|
# Write our info first so we have *some* status while
|
||||||
|
# waiting for packets
|
||||||
|
if total_size > 0:
|
||||||
|
percent = float(bytes_so_far) / total_size
|
||||||
|
percent = round(percent*100, 2)
|
||||||
|
t_s = get_size(total_size)
|
||||||
|
try:
|
||||||
|
b_s = get_size(bytes_so_far, t_s.split(" ")[1])
|
||||||
|
except:
|
||||||
|
b_s = get_size(bytes_so_far)
|
||||||
|
perc_str = " {:.2f}%".format(percent)
|
||||||
|
bar_width = (TERMINAL_WIDTH // 3)-len(perc_str)
|
||||||
|
progress = "=" * int(bar_width * (percent/100))
|
||||||
|
sys.stdout.write("\r\033[K{}/{} | {}{}{}{}{}".format(
|
||||||
|
b_s,
|
||||||
|
t_s,
|
||||||
|
progress,
|
||||||
|
" " * (bar_width-len(progress)),
|
||||||
|
perc_str,
|
||||||
|
speed,
|
||||||
|
remaining
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
b_s = get_size(bytes_so_far)
|
||||||
|
sys.stdout.write("\r\033[K{}{}".format(b_s, speed))
|
||||||
|
sys.stdout.flush()
|
||||||
|
# Now we gather the next packet
|
||||||
|
try:
|
||||||
|
packet = queue.get(timeout=update_interval)
|
||||||
|
# Packets should be formatted as a tuple of
|
||||||
|
# (timestamp, len(bytes_downloaded))
|
||||||
|
# If "DONE" is passed, we assume the download
|
||||||
|
# finished - and bail
|
||||||
|
if packet == "DONE":
|
||||||
|
print("") # Jump to the next line
|
||||||
|
return
|
||||||
|
# Append our packet to the list and ensure we're not
|
||||||
|
# beyond our max.
|
||||||
|
# Only check max if it's > 0
|
||||||
|
packets.append(packet)
|
||||||
|
if max_packets > 0:
|
||||||
|
packets = packets[-max_packets:]
|
||||||
|
# Increment our bytes so far as well
|
||||||
|
bytes_so_far += packet[1]
|
||||||
|
except q.Empty:
|
||||||
|
# Didn't get anything - reset the speed
|
||||||
|
# and packets
|
||||||
|
packets = []
|
||||||
|
speed = " | 0 B/s"
|
||||||
|
remaining = " | ?? left" if total_size > 0 else ""
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("") # Jump to the next line
|
||||||
|
return
|
||||||
|
# If we have packets and it's time for an update, process
|
||||||
|
# the info.
|
||||||
|
update_check = time.time()
|
||||||
|
if packets and update_check - last_update >= update_interval:
|
||||||
|
last_update = update_check # Refresh our update timestamp
|
||||||
|
speed = " | ?? B/s"
|
||||||
|
if len(packets) > 1:
|
||||||
|
# Let's calculate the amount downloaded over how long
|
||||||
|
try:
|
||||||
|
first,last = packets[0][0],packets[-1][0]
|
||||||
|
chunks = sum([float(x[1]) for x in packets])
|
||||||
|
t = last-first
|
||||||
|
assert t >= 0
|
||||||
|
bytes_speed = 1. / t * chunks
|
||||||
|
speed = " | {}/s".format(get_size(bytes_speed,round_to=1))
|
||||||
|
# Get our remaining time
|
||||||
|
if total_size > 0:
|
||||||
|
seconds_left = (total_size-bytes_so_far) / bytes_speed
|
||||||
|
days = seconds_left // 86400
|
||||||
|
hours = (seconds_left - (days*86400)) // 3600
|
||||||
|
mins = (seconds_left - (days*86400) - (hours*3600)) // 60
|
||||||
|
secs = seconds_left - (days*86400) - (hours*3600) - (mins*60)
|
||||||
|
if days > 99 or bytes_speed == 0:
|
||||||
|
remaining = " | ?? left"
|
||||||
|
else:
|
||||||
|
remaining = " | {}{:02d}:{:02d}:{:02d} left".format(
|
||||||
|
"{}:".format(int(days)) if days else "",
|
||||||
|
int(hours),
|
||||||
|
int(mins),
|
||||||
|
int(round(secs))
|
||||||
|
)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
# Clear the packets so we don't reuse the same ones
|
||||||
|
packets = []
|
||||||
|
|
||||||
class Downloader:
|
class Downloader:
|
||||||
|
|
||||||
|
@ -33,9 +166,38 @@ class Downloader:
|
||||||
return value.decode(encoding,errors)
|
return value.decode(encoding,errors)
|
||||||
return value
|
return value
|
||||||
|
|
||||||
|
def _update_main_name(self):
|
||||||
|
# Windows running python 2 seems to have issues with multiprocessing
|
||||||
|
# if the case of the main script's name is incorrect:
|
||||||
|
# e.g. Downloader.py vs downloader.py
|
||||||
|
#
|
||||||
|
# To work around this, we try to scrape for the correct case if
|
||||||
|
# possible.
|
||||||
|
try:
|
||||||
|
path = os.path.abspath(sys.modules["__main__"].__file__)
|
||||||
|
except AttributeError as e:
|
||||||
|
# This likely means we're running from the interpreter
|
||||||
|
# directly
|
||||||
|
return None
|
||||||
|
if not os.path.isfile(path):
|
||||||
|
return None
|
||||||
|
# Get the file name and folder path
|
||||||
|
name = os.path.basename(path).lower()
|
||||||
|
fldr = os.path.dirname(path)
|
||||||
|
# Walk the files in the folder until we find our
|
||||||
|
# name - then steal its case and update that path
|
||||||
|
for f in os.listdir(fldr):
|
||||||
|
if f.lower() == name:
|
||||||
|
# Got it
|
||||||
|
new_path = os.path.join(fldr,f)
|
||||||
|
sys.modules["__main__"].__file__ = new_path
|
||||||
|
return new_path
|
||||||
|
# If we got here, it wasn't found
|
||||||
|
return None
|
||||||
|
|
||||||
def open_url(self, url, headers = None):
|
def open_url(self, url, headers = None):
|
||||||
# Fall back on the default ua if none provided
|
# Fall back on the default ua if none provided
|
||||||
headers = self.ua if headers == None else headers
|
headers = self.ua if headers is None else headers
|
||||||
# Wrap up the try/except block so we don't have to do this for each function
|
# Wrap up the try/except block so we don't have to do this for each function
|
||||||
try:
|
try:
|
||||||
response = urlopen(Request(url, headers=headers), context=self.ssl_context)
|
response = urlopen(Request(url, headers=headers), context=self.ssl_context)
|
||||||
|
@ -44,89 +206,82 @@ class Downloader:
|
||||||
return None
|
return None
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def get_size(self, size, suffix=None, use_1024=False, round_to=2, strip_zeroes=False):
|
def get_size(self, *args, **kwargs):
|
||||||
# size is the number of bytes
|
return get_size(*args,**kwargs)
|
||||||
# suffix is the target suffix to locate (B, KB, MB, etc) - if found
|
|
||||||
# use_2014 denotes whether or not we display in MiB vs MB
|
|
||||||
# round_to is the number of dedimal points to round our result to (0-15)
|
|
||||||
# strip_zeroes denotes whether we strip out zeroes
|
|
||||||
|
|
||||||
# Failsafe in case our size is unknown
|
|
||||||
if size == -1:
|
|
||||||
return "Unknown"
|
|
||||||
# Get our suffixes based on use_1024
|
|
||||||
ext = ["B","KiB","MiB","GiB","TiB","PiB"] if use_1024 else ["B","KB","MB","GB","TB","PB"]
|
|
||||||
div = 1024 if use_1024 else 1000
|
|
||||||
s = float(size)
|
|
||||||
s_dict = {} # Initialize our dict
|
|
||||||
# Iterate the ext list, and divide by 1000 or 1024 each time to setup the dict {ext:val}
|
|
||||||
for e in ext:
|
|
||||||
s_dict[e] = s
|
|
||||||
s /= div
|
|
||||||
# Get our suffix if provided - will be set to None if not found, or if started as None
|
|
||||||
suffix = next((x for x in ext if x.lower() == suffix.lower()),None) if suffix else suffix
|
|
||||||
# Get the largest value that's still over 1
|
|
||||||
biggest = suffix if suffix else next((x for x in ext[::-1] if s_dict[x] >= 1), "B")
|
|
||||||
# Determine our rounding approach - first make sure it's an int; default to 2 on error
|
|
||||||
try:round_to=int(round_to)
|
|
||||||
except:round_to=2
|
|
||||||
round_to = 0 if round_to < 0 else 15 if round_to > 15 else round_to # Ensure it's between 0 and 15
|
|
||||||
bval = round(s_dict[biggest], round_to)
|
|
||||||
# Split our number based on decimal points
|
|
||||||
a,b = str(bval).split(".")
|
|
||||||
# Check if we need to strip or pad zeroes
|
|
||||||
b = b.rstrip("0") if strip_zeroes else b.ljust(round_to,"0") if round_to > 0 else ""
|
|
||||||
return "{:,}{} {}".format(int(a),"" if not b else "."+b,biggest)
|
|
||||||
|
|
||||||
def _progress_hook(self, bytes_so_far, total_size):
|
|
||||||
if total_size > 0:
|
|
||||||
percent = float(bytes_so_far) / total_size
|
|
||||||
percent = round(percent*100, 2)
|
|
||||||
t_s = self.get_size(total_size)
|
|
||||||
try: b_s = self.get_size(bytes_so_far, t_s.split(" ")[1])
|
|
||||||
except: b_s = self.get_size(bytes_so_far)
|
|
||||||
sys.stdout.write("\r\033[KDownloaded {} of {} ({:.2f}%)".format(b_s, t_s, percent))
|
|
||||||
else:
|
|
||||||
b_s = self.get_size(bytes_so_far)
|
|
||||||
sys.stdout.write("\r\033[KDownloaded {}".format(b_s))
|
|
||||||
|
|
||||||
def get_string(self, url, progress = True, headers = None, expand_gzip = True):
|
def get_string(self, url, progress = True, headers = None, expand_gzip = True):
|
||||||
response = self.get_bytes(url,progress,headers,expand_gzip)
|
response = self.get_bytes(url,progress,headers,expand_gzip)
|
||||||
if response == None: return None
|
if response is None: return None
|
||||||
return self._decode(response)
|
return self._decode(response)
|
||||||
|
|
||||||
def get_bytes(self, url, progress = True, headers = None, expand_gzip = True):
|
def get_bytes(self, url, progress = True, headers = None, expand_gzip = True):
|
||||||
response = self.open_url(url, headers)
|
response = self.open_url(url, headers)
|
||||||
if response == None: return None
|
if response is None: return None
|
||||||
bytes_so_far = 0
|
|
||||||
try: total_size = int(response.headers['Content-Length'])
|
try: total_size = int(response.headers['Content-Length'])
|
||||||
except: total_size = -1
|
except: total_size = -1
|
||||||
chunk_so_far = b""
|
chunk_so_far = b""
|
||||||
|
packets = queue = process = None
|
||||||
|
if progress:
|
||||||
|
# Make sure our vars are initialized
|
||||||
|
packets = [] if progress else None
|
||||||
|
queue = multiprocessing.Queue()
|
||||||
|
# Create the multiprocess and start it
|
||||||
|
process = multiprocessing.Process(target=_process_hook,args=(queue,total_size))
|
||||||
|
process.daemon = True
|
||||||
|
# Filthy hack for earlier python versions on Windows
|
||||||
|
if os.name == "nt" and hasattr(multiprocessing,"forking"):
|
||||||
|
self._update_main_name()
|
||||||
|
process.start()
|
||||||
while True:
|
while True:
|
||||||
chunk = response.read(self.chunk)
|
chunk = response.read(self.chunk)
|
||||||
bytes_so_far += len(chunk)
|
if progress:
|
||||||
if progress: self._progress_hook(bytes_so_far,total_size)
|
# Add our items to the queue
|
||||||
|
queue.put((time.time(),len(chunk)))
|
||||||
if not chunk: break
|
if not chunk: break
|
||||||
chunk_so_far += chunk
|
chunk_so_far += chunk
|
||||||
if expand_gzip and response.headers.get("Content-Encoding","unknown").lower() == "gzip":
|
if expand_gzip and response.headers.get("Content-Encoding","unknown").lower() == "gzip":
|
||||||
fileobj = BytesIO(chunk_so_far)
|
fileobj = BytesIO(chunk_so_far)
|
||||||
gfile = gzip.GzipFile(fileobj=fileobj)
|
gfile = gzip.GzipFile(fileobj=fileobj)
|
||||||
return gfile.read()
|
return gfile.read()
|
||||||
if progress: print("") # Add a newline so our last progress prints completely
|
if progress:
|
||||||
|
# Finalize the queue and wait
|
||||||
|
queue.put("DONE")
|
||||||
|
process.join()
|
||||||
return chunk_so_far
|
return chunk_so_far
|
||||||
|
|
||||||
def stream_to_file(self, url, file_path, progress = True, headers = None):
|
def stream_to_file(self, url, file_path, progress = True, headers = None, ensure_size_if_present = True):
|
||||||
response = self.open_url(url, headers)
|
response = self.open_url(url, headers)
|
||||||
if response == None: return None
|
if response is None: return None
|
||||||
bytes_so_far = 0
|
bytes_so_far = 0
|
||||||
try: total_size = int(response.headers['Content-Length'])
|
try: total_size = int(response.headers['Content-Length'])
|
||||||
except: total_size = -1
|
except: total_size = -1
|
||||||
|
packets = queue = process = None
|
||||||
|
if progress:
|
||||||
|
# Make sure our vars are initialized
|
||||||
|
packets = [] if progress else None
|
||||||
|
queue = multiprocessing.Queue()
|
||||||
|
# Create the multiprocess and start it
|
||||||
|
process = multiprocessing.Process(target=_process_hook,args=(queue,total_size))
|
||||||
|
process.daemon = True
|
||||||
|
# Filthy hack for earlier python versions on Windows
|
||||||
|
if os.name == "nt" and hasattr(multiprocessing,"forking"):
|
||||||
|
self._update_main_name()
|
||||||
|
process.start()
|
||||||
with open(file_path, 'wb') as f:
|
with open(file_path, 'wb') as f:
|
||||||
while True:
|
while True:
|
||||||
chunk = response.read(self.chunk)
|
chunk = response.read(self.chunk)
|
||||||
bytes_so_far += len(chunk)
|
bytes_so_far += len(chunk)
|
||||||
if progress: self._progress_hook(bytes_so_far,total_size)
|
if progress:
|
||||||
|
# Add our items to the queue
|
||||||
|
queue.put((time.time(),len(chunk)))
|
||||||
if not chunk: break
|
if not chunk: break
|
||||||
f.write(chunk)
|
f.write(chunk)
|
||||||
if progress: print("") # Add a newline so our last progress prints completely
|
if progress:
|
||||||
|
# Finalize the queue and wait
|
||||||
|
queue.put("DONE")
|
||||||
|
process.join()
|
||||||
|
if ensure_size_if_present and total_size != -1:
|
||||||
|
# We're verifying size - make sure we got what we asked for
|
||||||
|
if bytes_so_far != total_size:
|
||||||
|
return None # We didn't - imply it failed
|
||||||
return file_path if os.path.exists(file_path) else None
|
return file_path if os.path.exists(file_path) else None
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue