From 5cda4eda7253d766611363a880af46895c11ad17 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 17 Feb 2015 21:37:48 +0100 Subject: [PATCH] [YoutubeDL] Use a progress hook for progress reporting Instead of every downloader calling two helper functions, let our progress report be an ordinary progress hook like everyone else's. Closes #4875. --- youtube_dl/YoutubeDL.py | 13 +++-- youtube_dl/downloader/common.py | 87 +++++++++++++++++++++------------ youtube_dl/downloader/f4m.py | 70 ++++++++++++++++---------- youtube_dl/downloader/http.py | 13 +++-- youtube_dl/downloader/rtmp.py | 18 +++---- 5 files changed, 128 insertions(+), 73 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 3658332ecd..b087d356fc 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -199,18 +199,25 @@ class YoutubeDL(object): postprocessor. progress_hooks: A list of functions that get called on download progress, with a dictionary with the entries - * status: One of "downloading" and "finished". + * status: One of "downloading", "error", or "finished". Check this first and ignore unknown values. - If status is one of "downloading" or "finished", the + If status is one of "downloading", or "finished", the following properties may also be present: * filename: The final filename (always present) + * tmpfilename: The filename we're currently writing to * downloaded_bytes: Bytes on disk * total_bytes: Size of the whole file, None if unknown - * tmpfilename: The filename we're currently writing to + * total_bytes_estimate: Guess of the eventual file size, + None if unavailable. + * elapsed: The number of seconds since download started. * eta: The estimated time in seconds, None if unknown * speed: The download speed in bytes/second, None if unknown + * fragment_index: The counter of the currently + downloaded video fragment. + * fragment_count: The number of fragments (= individual + files that will be merged) Progress hooks are guaranteed to be called at least once (with status "finished") if the download is successful. diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 7bb3a948d2..45e55b99c6 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -1,4 +1,4 @@ -from __future__ import unicode_literals +from __future__ import division, unicode_literals import os import re @@ -54,6 +54,7 @@ def __init__(self, ydl, params): self.ydl = ydl self._progress_hooks = [] self.params = params + self.add_progress_hook(self.report_progress) @staticmethod def format_seconds(seconds): @@ -226,42 +227,64 @@ def _report_progress_status(self, msg, is_last_line=False): self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line) self.to_console_title('youtube-dl ' + msg) - def report_progress(self, percent, data_len_str, speed, eta): - """Report download progress.""" - if self.params.get('noprogress', False): + def report_progress(self, s): + if s['status'] == 'finished': + if self.params.get('noprogress', False): + self.to_screen('[download] Download completed') + else: + s['_total_bytes_str'] = format_bytes(s['total_bytes']) + if s.get('elapsed') is not None: + s['_elapsed_str'] = self.format_seconds(s['elapsed']) + msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s' + else: + msg_template = '100%% of %(_total_bytes_str)s' + self._report_progress_status( + msg_template % s, is_last_line=True) + + if self.params.get('noprogress'): return - if eta is not None: - eta_str = self.format_eta(eta) - else: - eta_str = 'Unknown ETA' - if percent is not None: - percent_str = self.format_percent(percent) - else: - percent_str = 'Unknown %' - speed_str = self.format_speed(speed) - msg = ('%s of %s at %s ETA %s' % - (percent_str, data_len_str, speed_str, eta_str)) - self._report_progress_status(msg) - - def report_progress_live_stream(self, downloaded_data_len, speed, elapsed): - if self.params.get('noprogress', False): + if s['status'] != 'downloading': return - downloaded_str = format_bytes(downloaded_data_len) - speed_str = self.format_speed(speed) - elapsed_str = FileDownloader.format_seconds(elapsed) - msg = '%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str) - self._report_progress_status(msg) - def report_finish(self, data_len_str, tot_time): - """Report download finished.""" - if self.params.get('noprogress', False): - self.to_screen('[download] Download completed') + if s.get('eta') is not None: + s['_eta_str'] = self.format_eta(s['eta']) else: - self._report_progress_status( - ('100%% of %s in %s' % - (data_len_str, self.format_seconds(tot_time))), - is_last_line=True) + s['_eta_str'] = 'Unknown ETA' + + if s.get('total_bytes') and s.get('downloaded_bytes') is not None: + s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes']) + elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None: + s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate']) + else: + if s.get('downloaded_bytes') == 0: + s['_percent_str'] = self.format_percent(0) + else: + s['_percent_str'] = 'Unknown %' + + if s.get('speed') is not None: + s['_speed_str'] = self.format_speed(s['speed']) + else: + s['_speed_str'] = 'Unknown speed' + + if s.get('total_bytes') is not None: + s['_total_bytes_str'] = format_bytes(s['total_bytes']) + msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s' + elif s.get('total_bytes_estimate') is not None: + s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate']) + msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s' + else: + if s.get('downloaded_bytes') is not None: + s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes']) + if s.get('elapsed'): + s['_elapsed_str'] = self.format_seconds(s['elapsed']) + msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)' + else: + msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s' + else: + msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s' + + self._report_progress_status(msg_template % s) def report_resuming_byte(self, resume_len): """Report attempt to resume at given byte.""" diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 0e7a1c2007..5a1f8e6800 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -1,4 +1,4 @@ -from __future__ import unicode_literals +from __future__ import division, unicode_literals import base64 import io @@ -252,17 +252,6 @@ def real_download(self, filename, info_dict): requested_bitrate = info_dict.get('tbr') self.to_screen('[download] Downloading f4m manifest') manifest = self.ydl.urlopen(man_url).read() - self.report_destination(filename) - http_dl = HttpQuietDownloader( - self.ydl, - { - 'continuedl': True, - 'quiet': True, - 'noprogress': True, - 'ratelimit': self.params.get('ratelimit', None), - 'test': self.params.get('test', False), - } - ) doc = etree.fromstring(manifest) formats = [(int(f.attrib.get('bitrate', -1)), f) @@ -298,39 +287,67 @@ def real_download(self, filename, info_dict): # For some akamai manifests we'll need to add a query to the fragment url akamai_pv = xpath_text(doc, _add_ns('pv-2.0')) + self.report_destination(filename) + http_dl = HttpQuietDownloader( + self.ydl, + { + 'continuedl': True, + 'quiet': True, + 'noprogress': True, + 'ratelimit': self.params.get('ratelimit', None), + 'test': self.params.get('test', False), + } + ) tmpfilename = self.temp_name(filename) (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') + write_flv_header(dest_stream) write_metadata_tag(dest_stream, metadata) # This dict stores the download progress, it's updated by the progress # hook state = { + 'status': 'downloading', 'downloaded_bytes': 0, - 'frag_counter': 0, + 'frag_index': 0, + 'frag_count': total_frags, + 'filename': filename, + 'tmpfilename': tmpfilename, } start = time.time() - def frag_progress_hook(status): - frag_total_bytes = status.get('total_bytes', 0) - estimated_size = (state['downloaded_bytes'] + - (total_frags - state['frag_counter']) * frag_total_bytes) - if status['status'] == 'finished': + def frag_progress_hook(s): + if s['status'] not in ('downloading', 'finished'): + return + + frag_total_bytes = s.get('total_bytes', 0) + if s['status'] == 'finished': state['downloaded_bytes'] += frag_total_bytes - state['frag_counter'] += 1 - progress = self.calc_percent(state['frag_counter'], total_frags) + state['frag_index'] += 1 + + estimated_size = ( + (state['downloaded_bytes'] + frag_total_bytes) + / (state['frag_index'] + 1) * total_frags) + time_now = time.time() + state['total_bytes_estimate'] = estimated_size + state['elapsed'] = time_now - start + + if s['status'] == 'finished': + progress = self.calc_percent(state['frag_index'], total_frags) byte_counter = state['downloaded_bytes'] else: - frag_downloaded_bytes = status['downloaded_bytes'] + frag_downloaded_bytes = s['downloaded_bytes'] byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes frag_progress = self.calc_percent(frag_downloaded_bytes, frag_total_bytes) - progress = self.calc_percent(state['frag_counter'], total_frags) + progress = self.calc_percent(state['frag_index'], total_frags) progress += frag_progress / float(total_frags) - eta = self.calc_eta(start, time.time(), estimated_size, byte_counter) - self.report_progress(progress, format_bytes(estimated_size), - status.get('speed'), eta) + state['eta'] = self.calc_eta( + start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes) + state['speed'] = s.get('speed') + self._hook_progress(state) + http_dl.add_progress_hook(frag_progress_hook) frags_filenames = [] @@ -354,8 +371,8 @@ def frag_progress_hook(status): frags_filenames.append(frag_filename) dest_stream.close() - self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start) + elapsed = time.time() - start self.try_rename(tmpfilename, filename) for frag_file in frags_filenames: os.remove(frag_file) @@ -366,6 +383,7 @@ def frag_progress_hook(status): 'total_bytes': fsize, 'filename': filename, 'status': 'finished', + 'elapsed': elapsed, }) return True diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 49170cf9d4..d37522aeae 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -200,16 +200,16 @@ def real_download(self, filename, info_dict): else: percent = self.calc_percent(byte_counter, data_len) eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) - self.report_progress(percent, data_len_str, speed, eta) self._hook_progress({ + 'status': 'downloading', 'downloaded_bytes': byte_counter, 'total_bytes': data_len, 'tmpfilename': tmpfilename, 'filename': filename, - 'status': 'downloading', 'eta': eta, 'speed': speed, + 'elapsed': now - start, }) if is_test and byte_counter == data_len: @@ -221,7 +221,13 @@ def real_download(self, filename, info_dict): return False if tmpfilename != '-': stream.close() - self.report_finish(data_len_str, (time.time() - start)) + + self._hook_progress({ + 'downloaded_bytes': byte_counter, + 'total_bytes': data_len, + 'tmpfilename': tmpfilename, + 'status': 'error', + }) if data_len is not None and byte_counter != data_len: raise ContentTooShortError(byte_counter, int(data_len)) self.try_rename(tmpfilename, filename) @@ -235,6 +241,7 @@ def real_download(self, filename, info_dict): 'total_bytes': byte_counter, 'filename': filename, 'status': 'finished', + 'elapsed': time.time() - start, }) return True diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py index f7eeb6f43f..79836fe99a 100644 --- a/youtube_dl/downloader/rtmp.py +++ b/youtube_dl/downloader/rtmp.py @@ -51,23 +51,23 @@ def run_rtmpdump(args): if not resume_percent: resume_percent = percent resume_downloaded_data_len = downloaded_data_len - eta = self.calc_eta(start, time.time(), 100 - resume_percent, percent - resume_percent) - speed = self.calc_speed(start, time.time(), downloaded_data_len - resume_downloaded_data_len) + time_now = time.time() + eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent) + speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len) data_len = None if percent > 0: data_len = int(downloaded_data_len * 100 / percent) - data_len_str = '~' + format_bytes(data_len) - self.report_progress(percent, data_len_str, speed, eta) - cursor_in_new_line = False self._hook_progress({ + 'status': 'downloading', 'downloaded_bytes': downloaded_data_len, - 'total_bytes': data_len, + 'total_bytes_estimate': data_len, 'tmpfilename': tmpfilename, 'filename': filename, - 'status': 'downloading', 'eta': eta, + 'elapsed': time_now - start, 'speed': speed, }) + cursor_in_new_line = False else: # no percent for live streams mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line) @@ -75,15 +75,15 @@ def run_rtmpdump(args): downloaded_data_len = int(float(mobj.group(1)) * 1024) time_now = time.time() speed = self.calc_speed(start, time_now, downloaded_data_len) - self.report_progress_live_stream(downloaded_data_len, speed, time_now - start) - cursor_in_new_line = False self._hook_progress({ 'downloaded_bytes': downloaded_data_len, 'tmpfilename': tmpfilename, 'filename': filename, 'status': 'downloading', + 'elapsed': time_now - start, 'speed': speed, }) + cursor_in_new_line = False elif self.params.get('verbose', False): if not cursor_in_new_line: self.to_screen('')