From b811749e5b0710410e4f9603b0c8baac83b3465e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20El=20Amri?= Date: Wed, 17 Jul 2019 02:14:40 +0200 Subject: [PATCH 1/6] Attempt to fix Twitch extractors for new URLs Twitch changed its URLs for user's videos lists recently. This commit includes fixes for "past broadcasts", "hightlights" and "uploads". --- youtube_dl/extractor/twitch.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 0500e33a6..0ed0c1dbb 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -456,58 +456,49 @@ class TwitchAllVideosIE(TwitchVideosBaseIE): class TwitchUploadsIE(TwitchVideosBaseIE): IE_NAME = 'twitch:videos:uploads' - _VALID_URL = r'%s/uploads' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE + _VALID_URL = r'%s\/?\?.*filter=uploads.*' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'upload' _PLAYLIST_TYPE = 'uploads' _TESTS = [{ - 'url': 'https://www.twitch.tv/spamfish/videos/uploads', + 'url': 'https://www.twitch.tv/spamfish/videos?filter=uploads', 'info_dict': { 'id': 'spamfish', 'title': 'Spamfish', }, 'playlist_mincount': 0, - }, { - 'url': 'https://m.twitch.tv/spamfish/videos/uploads', - 'only_matching': True, }] class TwitchPastBroadcastsIE(TwitchVideosBaseIE): IE_NAME = 'twitch:videos:past-broadcasts' - _VALID_URL = r'%s/past-broadcasts' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE + _VALID_URL = r'%s\/?\?.*filter=archives.*' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive' _PLAYLIST_TYPE = 'past broadcasts' _TESTS = [{ - 'url': 'https://www.twitch.tv/spamfish/videos/past-broadcasts', + 'url': 'https://www.twitch.tv/spamfish/videos?filter=archives', 'info_dict': { 'id': 'spamfish', 'title': 'Spamfish', }, 'playlist_mincount': 0, - }, { - 'url': 'https://m.twitch.tv/spamfish/videos/past-broadcasts', - 'only_matching': True, }] class TwitchHighlightsIE(TwitchVideosBaseIE): IE_NAME = 'twitch:videos:highlights' - _VALID_URL = r'%s/highlights' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE + _VALID_URL = r'%s\/?\?.*filter=highlights.*' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'highlight' _PLAYLIST_TYPE = 'highlights' _TESTS = [{ - 'url': 'https://www.twitch.tv/spamfish/videos/highlights', + 'url': 'https://www.twitch.tv/spamfish/videos?filter=highlights', 'info_dict': { 'id': 'spamfish', 'title': 'Spamfish', }, 'playlist_mincount': 805, - }, { - 'url': 'https://m.twitch.tv/spamfish/videos/highlights', - 'only_matching': True, }] From cb7a08744685f5bac3a27da7d4d1921c370715fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20El=20Amri?= Date: Thu, 18 Jul 2019 14:52:24 +0200 Subject: [PATCH 2/6] [twitch.py] Removed obsolete IE --- youtube_dl/extractor/extractors.py | 2 -- youtube_dl/extractor/twitch.py | 37 ------------------------------ 2 files changed, 39 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 555fadfaf..a974eed55 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1231,8 +1231,6 @@ from .twentythreevideo import TwentyThreeVideoIE from .twitcasting import TwitCastingIE from .twitch import ( - TwitchVideoIE, - TwitchChapterIE, TwitchVodIE, TwitchProfileIE, TwitchAllVideosIE, diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 0ed0c1dbb..a8d1ce050 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -205,43 +205,6 @@ def _real_extract(self, url): return self._extract_media(self._match_id(url)) -class TwitchVideoIE(TwitchItemBaseIE): - IE_NAME = 'twitch:video' - _VALID_URL = r'%s/[^/]+/b/(?P\d+)' % TwitchBaseIE._VALID_URL_BASE - _ITEM_TYPE = 'video' - _ITEM_SHORTCUT = 'a' - - _TEST = { - 'url': 'http://www.twitch.tv/riotgames/b/577357806', - 'info_dict': { - 'id': 'a577357806', - 'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG', - }, - 'playlist_mincount': 12, - 'skip': 'HTTP Error 404: Not Found', - } - - -class TwitchChapterIE(TwitchItemBaseIE): - IE_NAME = 'twitch:chapter' - _VALID_URL = r'%s/[^/]+/c/(?P\d+)' % TwitchBaseIE._VALID_URL_BASE - _ITEM_TYPE = 'chapter' - _ITEM_SHORTCUT = 'c' - - _TESTS = [{ - 'url': 'http://www.twitch.tv/acracingleague/c/5285812', - 'info_dict': { - 'id': 'c5285812', - 'title': 'ACRL Off Season - Sports Cars @ Nordschleife', - }, - 'playlist_mincount': 3, - 'skip': 'HTTP Error 404: Not Found', - }, { - 'url': 'http://www.twitch.tv/tsm_theoddone/c/2349361', - 'only_matching': True, - }] - - class TwitchVodIE(TwitchItemBaseIE): IE_NAME = 'twitch:vod' _VALID_URL = r'''(?x) From 7679c2333241e050d4222e84c4bc173548fd01ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20El=20Amri?= Date: Thu, 18 Jul 2019 15:21:13 +0200 Subject: [PATCH 3/6] [twitch.py] Modified _VALID_URL of all playlist IEs Also fixed a bug because of removed TwitchVideoIE and TwitchChapterIE --- youtube_dl/extractor/twitch.py | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index a8d1ce050..be90669d8 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -394,37 +394,34 @@ class TwitchProfileIE(TwitchPlaylistBaseIE): class TwitchVideosBaseIE(TwitchPlaylistBaseIE): - _VALID_URL_VIDEOS_BASE = r'%s/(?P[^/]+)/videos' % TwitchBaseIE._VALID_URL_BASE + _VALID_URL_VIDEOS_BASE = r'%s/(?P[^/]+)/videos/?\?(?:.*?[&;])??filter=%%s' % TwitchBaseIE._VALID_URL_BASE _PLAYLIST_PATH = TwitchPlaylistBaseIE._PLAYLIST_PATH + '&broadcast_type=' class TwitchAllVideosIE(TwitchVideosBaseIE): IE_NAME = 'twitch:videos:all' - _VALID_URL = r'%s/all' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE + _VALID_URL = TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE % 'all' _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive,upload,highlight' _PLAYLIST_TYPE = 'all videos' _TESTS = [{ - 'url': 'https://www.twitch.tv/spamfish/videos/all', + 'url': 'https://www.twitch.tv/spamfish/videos?filter=all&sort=time', 'info_dict': { 'id': 'spamfish', 'title': 'Spamfish', }, 'playlist_mincount': 869, - }, { - 'url': 'https://m.twitch.tv/spamfish/videos/all', - 'only_matching': True, }] class TwitchUploadsIE(TwitchVideosBaseIE): IE_NAME = 'twitch:videos:uploads' - _VALID_URL = r'%s\/?\?.*filter=uploads.*' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE + _VALID_URL = TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE % 'uploads' _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'upload' _PLAYLIST_TYPE = 'uploads' _TESTS = [{ - 'url': 'https://www.twitch.tv/spamfish/videos?filter=uploads', + 'url': 'https://www.twitch.tv/spamfish/videos?filter=uploads&sort=time', 'info_dict': { 'id': 'spamfish', 'title': 'Spamfish', @@ -435,12 +432,12 @@ class TwitchUploadsIE(TwitchVideosBaseIE): class TwitchPastBroadcastsIE(TwitchVideosBaseIE): IE_NAME = 'twitch:videos:past-broadcasts' - _VALID_URL = r'%s\/?\?.*filter=archives.*' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE + _VALID_URL = TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE % 'archives' _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive' _PLAYLIST_TYPE = 'past broadcasts' _TESTS = [{ - 'url': 'https://www.twitch.tv/spamfish/videos?filter=archives', + 'url': 'https://www.twitch.tv/spamfish/videos?filter=archives&sort=time', 'info_dict': { 'id': 'spamfish', 'title': 'Spamfish', @@ -451,12 +448,12 @@ class TwitchPastBroadcastsIE(TwitchVideosBaseIE): class TwitchHighlightsIE(TwitchVideosBaseIE): IE_NAME = 'twitch:videos:highlights' - _VALID_URL = r'%s\/?\?.*filter=highlights.*' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE + _VALID_URL = TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE % 'highlights' _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'highlight' _PLAYLIST_TYPE = 'highlights' _TESTS = [{ - 'url': 'https://www.twitch.tv/spamfish/videos?filter=highlights', + 'url': 'https://www.twitch.tv/spamfish/videos?filter=highlights&sort=views', 'info_dict': { 'id': 'spamfish', 'title': 'Spamfish', @@ -513,8 +510,6 @@ class TwitchStreamIE(TwitchBaseIE): def suitable(cls, url): return (False if any(ie.suitable(url) for ie in ( - TwitchVideoIE, - TwitchChapterIE, TwitchVodIE, TwitchProfileIE, TwitchAllVideosIE, From a7d44de6da0a44e2034ecd1e997a738e94fe70e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20El=20Amri?= Date: Sat, 20 Jul 2019 20:02:48 +0200 Subject: [PATCH 4/6] Reverting the removal of TwitchVideoIE and TwitchChapterIE --- youtube_dl/extractor/extractors.py | 2 ++ youtube_dl/extractor/twitch.py | 39 ++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a974eed55..555fadfaf 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1231,6 +1231,8 @@ from .twentythreevideo import TwentyThreeVideoIE from .twitcasting import TwitCastingIE from .twitch import ( + TwitchVideoIE, + TwitchChapterIE, TwitchVodIE, TwitchProfileIE, TwitchAllVideosIE, diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index be90669d8..bc8f32936 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -205,6 +205,43 @@ def _real_extract(self, url): return self._extract_media(self._match_id(url)) +class TwitchVideoIE(TwitchItemBaseIE): + IE_NAME = 'twitch:video' + _VALID_URL = r'%s/[^/]+/b/(?P\d+)' % TwitchBaseIE._VALID_URL_BASE + _ITEM_TYPE = 'video' + _ITEM_SHORTCUT = 'a' + + _TEST = { + 'url': 'http://www.twitch.tv/riotgames/b/577357806', + 'info_dict': { + 'id': 'a577357806', + 'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG', + }, + 'playlist_mincount': 12, + 'skip': 'HTTP Error 404: Not Found', + } + + +class TwitchChapterIE(TwitchItemBaseIE): + IE_NAME = 'twitch:chapter' + _VALID_URL = r'%s/[^/]+/c/(?P\d+)' % TwitchBaseIE._VALID_URL_BASE + _ITEM_TYPE = 'chapter' + _ITEM_SHORTCUT = 'c' + + _TESTS = [{ + 'url': 'http://www.twitch.tv/acracingleague/c/5285812', + 'info_dict': { + 'id': 'c5285812', + 'title': 'ACRL Off Season - Sports Cars @ Nordschleife', + }, + 'playlist_mincount': 3, + 'skip': 'HTTP Error 404: Not Found', + }, { + 'url': 'http://www.twitch.tv/tsm_theoddone/c/2349361', + 'only_matching': True, + }] + + class TwitchVodIE(TwitchItemBaseIE): IE_NAME = 'twitch:vod' _VALID_URL = r'''(?x) @@ -510,6 +547,8 @@ class TwitchStreamIE(TwitchBaseIE): def suitable(cls, url): return (False if any(ie.suitable(url) for ie in ( + TwitchVideoIE, + TwitchChapterIE, TwitchVodIE, TwitchProfileIE, TwitchAllVideosIE, From ca7e9e1cd1704b81562962f135464dbaefd22699 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20El=20Amri?= Date: Sat, 20 Jul 2019 20:35:58 +0200 Subject: [PATCH 5/6] [twitch.py] Improved _VALID_URL --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/twitch.py | 26 +++++++++++++++++++++----- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 555fadfaf..2b26cb347 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1235,10 +1235,10 @@ TwitchChapterIE, TwitchVodIE, TwitchProfileIE, - TwitchAllVideosIE, TwitchUploadsIE, TwitchPastBroadcastsIE, TwitchHighlightsIE, + TwitchAllVideosIE, TwitchStreamIE, TwitchClipsIE, ) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index bc8f32936..6a1015ce5 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -431,13 +431,17 @@ class TwitchProfileIE(TwitchPlaylistBaseIE): class TwitchVideosBaseIE(TwitchPlaylistBaseIE): - _VALID_URL_VIDEOS_BASE = r'%s/(?P[^/]+)/videos/?\?(?:.*?[&;])??filter=%%s' % TwitchBaseIE._VALID_URL_BASE + _VALID_URL_VIDEOS_BASE = r'%s/(?P[^/]+)/videos' % TwitchBaseIE._VALID_URL_BASE + _VALID_URL_VIDEOS_FILTERS = r'\?(?:.*?[&;])??filter=%s' _PLAYLIST_PATH = TwitchPlaylistBaseIE._PLAYLIST_PATH + '&broadcast_type=' class TwitchAllVideosIE(TwitchVideosBaseIE): IE_NAME = 'twitch:videos:all' - _VALID_URL = TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE % 'all' + _VALID_URL = '%s(?:/?(?:%s)|[^/?]+?/?)?' % ( + TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE, + TwitchVideosBaseIE._VALID_URL_VIDEOS_FILTERS % 'all' + ) _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive,upload,highlight' _PLAYLIST_TYPE = 'all videos' @@ -448,12 +452,18 @@ class TwitchAllVideosIE(TwitchVideosBaseIE): 'title': 'Spamfish', }, 'playlist_mincount': 869, + }, { + 'url': 'https://m.twitch.tv/spamfish/videos/', + 'only_matching': True, }] class TwitchUploadsIE(TwitchVideosBaseIE): IE_NAME = 'twitch:videos:uploads' - _VALID_URL = TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE % 'uploads' + _VALID_URL = '%s/?(?:%s)' % ( + TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE, + TwitchVideosBaseIE._VALID_URL_VIDEOS_FILTERS % 'uploads' + ) _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'upload' _PLAYLIST_TYPE = 'uploads' @@ -469,7 +479,10 @@ class TwitchUploadsIE(TwitchVideosBaseIE): class TwitchPastBroadcastsIE(TwitchVideosBaseIE): IE_NAME = 'twitch:videos:past-broadcasts' - _VALID_URL = TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE % 'archives' + _VALID_URL = '%s/?(?:%s)' % ( + TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE, + TwitchVideosBaseIE._VALID_URL_VIDEOS_FILTERS % 'archives' + ) _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive' _PLAYLIST_TYPE = 'past broadcasts' @@ -485,7 +498,10 @@ class TwitchPastBroadcastsIE(TwitchVideosBaseIE): class TwitchHighlightsIE(TwitchVideosBaseIE): IE_NAME = 'twitch:videos:highlights' - _VALID_URL = TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE % 'highlights' + _VALID_URL = '%s/?(?:%s)' % ( + TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE, + TwitchVideosBaseIE._VALID_URL_VIDEOS_FILTERS % 'highlights' + ) _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'highlight' _PLAYLIST_TYPE = 'highlights' From d28026d934453e423344dfd363a9909c520ef0b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20El=20Amri?= Date: Mon, 29 Jul 2019 19:03:19 +0200 Subject: [PATCH 6/6] [twitch] Modified _VALID_URL for TwitchAllVideosIE Reverted the modification of youtube_dl/extractor/extractors.py since TwitchAllVideosIE now only matches on URLs with filter=all or without any parameters --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/twitch.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2b26cb347..555fadfaf 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1235,10 +1235,10 @@ TwitchChapterIE, TwitchVodIE, TwitchProfileIE, + TwitchAllVideosIE, TwitchUploadsIE, TwitchPastBroadcastsIE, TwitchHighlightsIE, - TwitchAllVideosIE, TwitchStreamIE, TwitchClipsIE, ) diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 6a1015ce5..ef823220c 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -438,7 +438,7 @@ class TwitchVideosBaseIE(TwitchPlaylistBaseIE): class TwitchAllVideosIE(TwitchVideosBaseIE): IE_NAME = 'twitch:videos:all' - _VALID_URL = '%s(?:/?(?:%s)|[^/?]+?/?)?' % ( + _VALID_URL = '%s/?(?:(?:%s)|$)' % ( TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE, TwitchVideosBaseIE._VALID_URL_VIDEOS_FILTERS % 'all' )