From be64b5b098e3563d563bcf091f6f74edf22d7764 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 25 Nov 2014 09:54:54 +0100 Subject: [PATCH] [xminus] Simplify and extend (#4302) --- test/test_utils.py | 10 +++++++ youtube_dl/extractor/xminus.py | 54 +++++++++++++++++++++------------- youtube_dl/utils.py | 51 ++++++++++++++++++++++++++++++++ 3 files changed, 95 insertions(+), 20 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 8307599b3..9dc879e0d 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -47,6 +47,7 @@ js_to_json, intlist_to_bytes, args_to_str, + parse_filesize, ) @@ -367,5 +368,14 @@ def test_args_to_str(self): 'foo ba/r -baz \'2 be\' \'\'' ) + def test_parse_filesize(self): + self.assertEqual(parse_filesize(None), None) + self.assertEqual(parse_filesize(''), None) + self.assertEqual(parse_filesize('91 B'), 91) + self.assertEqual(parse_filesize('foobar'), None) + self.assertEqual(parse_filesize('2 MiB'), 2097152) + self.assertEqual(parse_filesize('5 GB'), 5000000000) + self.assertEqual(parse_filesize('1.2Tb'), 1200000000000) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/xminus.py b/youtube_dl/extractor/xminus.py index c92c8451a..a3fd8f48c 100644 --- a/youtube_dl/extractor/xminus.py +++ b/youtube_dl/extractor/xminus.py @@ -2,7 +2,14 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..compat import ( + compat_chr, + compat_ord, +) +from ..utils import ( + int_or_none, + parse_filesize, +) class XMinusIE(InfoExtractor): @@ -15,39 +22,46 @@ class XMinusIE(InfoExtractor): 'ext': 'mp3', 'title': 'Леонид Агутин-Песенка шофера', 'duration': 156, + 'tbr': 320, + 'filesize_approx': 5900000, + 'view_count': int, } } def _real_extract(self, url): video_id = self._match_id(url) - - # TODO more code goes here, for example ... webpage = self._download_webpage(url, video_id) + artist = self._html_search_regex( - r'minus_track.artist="(.+?)"', webpage, 'artist') + r'minus_track\.artist="(.+?)"', webpage, 'artist') title = artist + '-' + self._html_search_regex( - r'minus_track.title="(.+?)"', webpage, 'title') + r'minus_track\.title="(.+?)"', webpage, 'title') duration = int_or_none(self._html_search_regex( - r'minus_track.dur_sec=\'([0-9]+?)\'', webpage, 'duration')) + r'minus_track\.dur_sec=\'([0-9]*?)\'', + webpage, 'duration', fatal=False)) + filesize_approx = parse_filesize(self._html_search_regex( + r'
\s*([0-9.]+\s*[a-zA-Z][bB])', + webpage, 'approximate filesize', fatal=False)) + tbr = int_or_none(self._html_search_regex( + r'
\s*([0-9]+)\s*kbps', + webpage, 'bitrate', fatal=False)) + view_count = int_or_none(self._html_search_regex( + r'
[0-9]+(?:\.[0-9]*)?)\s*(?P%s)' % units_re, s) + if not m: + return None + + return int(float(m.group('num')) * _UNIT_TABLE[m.group('unit')]) + + def get_term_width(): columns = compat_getenv('COLUMNS', None) if columns: