twitch.tv chapters: Use API for title and other metadata

2024-11-23 09:01:43 +00:00 · 2013-05-04 11:42:44 +02:00 · 2013-05-04 11:42:44 +02:00 · db8fd71ca9
commit db8fd71ca9
parent f4f316881d
1 changed files with 9 additions and 5 deletions
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@ -3368,10 +3368,6 @@ def _real_extract(self, url):
            if not m:
                raise ExtractorError(u'Cannot find archive of a chapter')
            archive_id = m.group(1)
-            m = re.search(r"<h2 class='js-title'>([^<]*)</h2>", webpage)
-            if not m:
-                raise ExtractorError(u'Cannot find chapter title')
-            video_title = m.group(1)

            api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
            chapter_info_xml = self._download_webpage(api, chapter_id,
@ -3387,6 +3383,12 @@ def _real_extract(self, url):
            video_url = a.find('./video_file_url').text
            video_ext = video_url.rpartition('.')[2] or u'flv'

+            chapter_api_url = u'https://api.twitch.tv/kraken/videos/c' + chapter_id
+            chapter_info_json = self._download_webpage(chapter_api_url, video_id,
+                                   note='Downloading chapter metadata',
+                                   errnote='Download of chapter metadata failed')
+            chapter_info = json.loads(chapter_info_json)
+
            # TODO determine start (and probably fix up file)
            #  youtube-dl -v http://www.twitch.tv/firmbelief/c/1757457
            #video_url += u'?start=' + a.find('./start_timestamp').text
@ -3396,7 +3398,9 @@ def _real_extract(self, url):
                'id': u'c' + chapter_id,
                'url': video_url,
                'ext': video_ext,
-                'title': video_title,
+                'title': chapter_info['title'],
+                'thumbnail': chapter_info['preview'],
+                'description': chapter_info['description'],
            }
            return [info]
        else: