[youtube] Move more tests to extractors

This commit is contained in:
Philipp Hagemeister 2014-09-24 10:25:47 +02:00
parent 69ea8ca42c
commit cdc628a498
2 changed files with 62 additions and 48 deletions

View File

@ -10,7 +10,6 @@
from youtube_dl.extractor import ( from youtube_dl.extractor import (
YoutubeUserIE,
YoutubePlaylistIE, YoutubePlaylistIE,
YoutubeIE, YoutubeIE,
YoutubeChannelIE, YoutubeChannelIE,
@ -43,28 +42,6 @@ def test_youtube_course(self):
self.assertEqual(len(entries), 25) self.assertEqual(len(entries), 25)
self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0') self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0')
def test_youtube_channel(self):
dl = FakeYDL()
ie = YoutubeChannelIE(dl)
#test paginated channel
result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')
self.assertTrue(len(result['entries']) > 90)
#test autogenerated channel
result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
self.assertTrue(len(result['entries']) >= 18)
def test_youtube_user(self):
dl = FakeYDL()
ie = YoutubeUserIE(dl)
result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
self.assertTrue(len(result['entries']) >= 320)
def test_youtube_show(self):
dl = FakeYDL()
ie = YoutubeShowIE(dl)
result = ie.extract('http://www.youtube.com/show/airdisasters')
self.assertTrue(len(result) >= 3)
def test_youtube_mix(self): def test_youtube_mix(self):
dl = FakeYDL() dl = FakeYDL()
ie = YoutubePlaylistIE(dl) ie = YoutubePlaylistIE(dl)
@ -83,21 +60,5 @@ def test_youtube_toptracks(self):
entries = result['entries'] entries = result['entries']
self.assertEqual(len(entries), 100) self.assertEqual(len(entries), 100)
def test_youtube_toplist(self):
dl = FakeYDL()
ie = YoutubeTopListIE(dl)
result = ie.extract('yttoplist:music:Trending')
entries = result['entries']
self.assertTrue(len(entries) >= 5)
def test_youtube_search_url(self):
dl = FakeYDL()
ie = YoutubeSearchURLIE(dl)
result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video')
entries = result['entries']
self.assertIsPlaylist(result)
self.assertEqual(result['title'], 'youtube-dl test video')
self.assertTrue(len(entries) >= 5)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -1160,16 +1160,25 @@ class YoutubeTopListIE(YoutubePlaylistIE):
IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"' IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'
' (Example: "yttoplist:music:Top Tracks")') ' (Example: "yttoplist:music:Top Tracks")')
_VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$' _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
_TESTS = [] _TESTS = [{
'url': 'yttoplist:music:Trending',
'playlist_mincount': 5,
'skip': 'Only works for logged-in users',
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
channel = mobj.group('chann') channel = mobj.group('chann')
title = mobj.group('title') title = mobj.group('title')
query = compat_urllib_parse.urlencode({'title': title}) query = compat_urllib_parse.urlencode({'title': title})
playlist_re = 'href="([^"]+?%s.*?)"' % re.escape(query) channel_page = self._download_webpage(
channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title) 'https://www.youtube.com/%s' % channel, title)
link = self._html_search_regex(playlist_re, channel_page, 'list') link = self._html_search_regex(
r'''(?x)
<a\s+href="([^"]+)".*?>\s*
<span\s+class="branded-page-module-title-text">\s*
<span[^>]*>.*?%s.*?</span>''' % re.escape(query),
channel_page, 'list')
url = compat_urlparse.urljoin('https://www.youtube.com/', link) url = compat_urlparse.urljoin('https://www.youtube.com/', link)
video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"' video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
@ -1195,6 +1204,11 @@ class YoutubeChannelIE(InfoExtractor):
_MORE_PAGES_INDICATOR = 'yt-uix-load-more' _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
_MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s' _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
IE_NAME = 'youtube:channel' IE_NAME = 'youtube:channel'
_TESTS = [{
'note': 'paginated channel',
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
'playlist_mincount': 91,
}]
def extract_videos_from_page(self, page): def extract_videos_from_page(self, page):
ids_in_page = [] ids_in_page = []
@ -1253,6 +1267,17 @@ class YoutubeUserIE(InfoExtractor):
_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
IE_NAME = 'youtube:user' IE_NAME = 'youtube:user'
_TESTS = [{
'url': 'https://www.youtube.com/user/TheLinuxFoundation',
'playlist_mincount': 320,
'info_dict': {
'title': 'TheLinuxFoundation',
}
}, {
'url': 'ytuser:phihag',
'only_matching': True,
}]
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
# Don't return True if the url can be extracted with other youtube # Don't return True if the url can be extracted with other youtube
@ -1361,6 +1386,13 @@ class YoutubeSearchURLIE(InfoExtractor):
IE_DESC = 'YouTube.com search URLs' IE_DESC = 'YouTube.com search URLs'
IE_NAME = 'youtube:search_url' IE_NAME = 'youtube:search_url'
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)' _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
_TESTS = [{
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
'playlist_mincount': 5,
'info_dict': {
'title': 'youtube-dl test video',
}
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@ -1395,17 +1427,38 @@ def _real_extract(self, url):
class YoutubeShowIE(InfoExtractor): class YoutubeShowIE(InfoExtractor):
IE_DESC = 'YouTube.com (multi-season) shows' IE_DESC = 'YouTube.com (multi-season) shows'
_VALID_URL = r'https?://www\.youtube\.com/show/(.*)' _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'
IE_NAME = 'youtube:show' IE_NAME = 'youtube:show'
_TESTS = [{
'url': 'http://www.youtube.com/show/airdisasters',
'playlist_mincount': 3,
'info_dict': {
'id': 'airdisasters',
'title': 'Air Disasters',
}
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
show_name = mobj.group(1) playlist_id = mobj.group('id')
webpage = self._download_webpage(url, show_name, 'Downloading show webpage') webpage = self._download_webpage(
url, playlist_id, 'Downloading show webpage')
# There's one playlist for each season of the show # There's one playlist for each season of the show
m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage)) m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
self.to_screen('%s: Found %s seasons' % (show_name, len(m_seasons))) self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons)))
return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons] entries = [
self.url_result(
'https://www.youtube.com' + season.group(1), 'YoutubePlaylist')
for season in m_seasons
]
title = self._og_search_title(webpage, fatal=False)
return {
'_type': 'playlist',
'id': playlist_id,
'title': title,
'entries': entries,
}
class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):