InstagramIE: fix the extraction of the uploader_id and the title

The page title is now 'Instagram', so we build it. Also extract the description
2024-11-27 10:52:34 +00:00 · 2013-07-18 13:12:27 +02:00 · 2013-07-18 13:12:27 +02:00 · 3f40217704
commit 3f40217704
parent f631c3311a
1 changed files with 10 additions and 12 deletions
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@ -10,7 +10,8 @@ class InstagramIE(InfoExtractor):
        u'md5': u'0d2da106a9d2631273e192b372806516',
        u'info_dict': {
            u"uploader_id": u"naomipq", 
-            u"title": u"Video by naomipq"
+            u"title": u"Video by naomipq",
            u'description': u'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
        }
    }
@ -18,20 +19,17 @@ def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group(1)
        webpage = self._download_webpage(url, video_id)
-        html_title = self._html_search_regex(
+        uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
-            r'<title>(.+?)</title>',
+            webpage, u'uploader id', fatal=False)
-            webpage, u'title', flags=re.DOTALL)
+        desc = self._search_regex(r'"caption":"(.*?)"', webpage, u'description',
-        title = re.sub(u'(?: *\(Videos?\))? \u2022 Instagram$', '', html_title).strip()
+            fatal=False)
        uploader_id = self._html_search_regex(
            r'<div class="media-user" id="media_user">.*?<h2><a href="[^"]*">([^<]*)</a></h2>',
            webpage, u'uploader id', fatal=False, flags=re.DOTALL)
        ext = 'mp4'
        return [{
            'id':        video_id,
            'url':       self._og_search_video_url(webpage),
-            'ext':       ext,
+            'ext':       'mp4',
-            'title':     title,
+            'title':     u'Video by %s' % uploader_id,
            'thumbnail': self._og_search_thumbnail(webpage),
-            'uploader_id' : uploader_id
+            'uploader_id' : uploader_id,
            'description': desc,
        }]