[telegram] Fix metadata extraction

Closes #3528
This commit is contained in:
pukkandan 2022-04-23 22:15:38 +05:30
parent b0f636beb4
commit 90f4229409
No known key found for this signature in database
GPG Key ID: 7EEE9E1E817D0A39

View File

@ -1,4 +1,5 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import clean_html, get_element_by_class
class TelegramEmbedIE(InfoExtractor): class TelegramEmbedIE(InfoExtractor):
@ -17,8 +18,8 @@ class TelegramEmbedIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id, query={'embed': 0})
webpage_embed = self._download_webpage(f'{url}?embed=1', video_id) webpage_embed = self._download_webpage(url, video_id, query={'embed': 1}, note='Downloading ermbed page')
formats = [{ formats = [{
'url': self._proto_relative_url(self._search_regex( 'url': self._proto_relative_url(self._search_regex(
@ -29,9 +30,12 @@ def _real_extract(self, url):
return { return {
'id': video_id, 'id': video_id,
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, fatal=True), 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
'description': self._html_search_meta(['og:description', 'twitter:description'], webpage, fatal=True), 'description': self._html_search_meta(
'thumbnail': self._search_regex(r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)', ['og:description', 'twitter:description'], webpage,
webpage_embed, 'thumbnail'), default=clean_html(get_element_by_class('tgme_widget_message_text', webpage_embed))),
'thumbnail': self._search_regex(
r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)',
webpage_embed, 'thumbnail'),
'formats': formats, 'formats': formats,
} }