mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-27 10:52:34 +00:00
Merge remote-tracking branch 'origin/master'
This commit is contained in:
commit
b3013681ff
1
AUTHORS
1
AUTHORS
@ -96,3 +96,4 @@ Mathias Rav
|
|||||||
Petr Kutalek
|
Petr Kutalek
|
||||||
Will Glynn
|
Will Glynn
|
||||||
Max Reimann
|
Max Reimann
|
||||||
|
Cédric Luthi
|
||||||
|
@ -25,6 +25,7 @@
|
|||||||
ArteTVDDCIE,
|
ArteTVDDCIE,
|
||||||
ArteTVEmbedIE,
|
ArteTVEmbedIE,
|
||||||
)
|
)
|
||||||
|
from .atresplayer import AtresPlayerIE
|
||||||
from .audiomack import AudiomackIE
|
from .audiomack import AudiomackIE
|
||||||
from .auengine import AUEngineIE
|
from .auengine import AUEngineIE
|
||||||
from .azubu import AzubuIE
|
from .azubu import AzubuIE
|
||||||
@ -169,8 +170,10 @@
|
|||||||
from .groupon import GrouponIE
|
from .groupon import GrouponIE
|
||||||
from .hark import HarkIE
|
from .hark import HarkIE
|
||||||
from .heise import HeiseIE
|
from .heise import HeiseIE
|
||||||
|
from .hellporno import HellPornoIE
|
||||||
from .helsinki import HelsinkiIE
|
from .helsinki import HelsinkiIE
|
||||||
from .hentaistigma import HentaiStigmaIE
|
from .hentaistigma import HentaiStigmaIE
|
||||||
|
from .hitbox import HitboxIE, HitboxLiveIE
|
||||||
from .hornbunny import HornBunnyIE
|
from .hornbunny import HornBunnyIE
|
||||||
from .hostingbulk import HostingBulkIE
|
from .hostingbulk import HostingBulkIE
|
||||||
from .hotnewhiphop import HotNewHipHopIE
|
from .hotnewhiphop import HotNewHipHopIE
|
||||||
@ -515,6 +518,7 @@
|
|||||||
from .xnxx import XNXXIE
|
from .xnxx import XNXXIE
|
||||||
from .xvideos import XVideosIE
|
from .xvideos import XVideosIE
|
||||||
from .xtube import XTubeUserIE, XTubeIE
|
from .xtube import XTubeUserIE, XTubeIE
|
||||||
|
from .xxxymovies import XXXYMoviesIE
|
||||||
from .yahoo import (
|
from .yahoo import (
|
||||||
YahooIE,
|
YahooIE,
|
||||||
YahooSearchIE,
|
YahooSearchIE,
|
||||||
|
114
youtube_dl/extractor/atresplayer.py
Normal file
114
youtube_dl/extractor/atresplayer.py
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import time
|
||||||
|
import hmac
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_str,
|
||||||
|
compat_urllib_request,
|
||||||
|
int_or_none,
|
||||||
|
float_or_none,
|
||||||
|
xpath_text,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AtresPlayerIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html',
|
||||||
|
'md5': 'efd56753cda1bb64df52a3074f62e38a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'capitulo-10-especial-solidario-nochebuena',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Especial Solidario de Nochebuena',
|
||||||
|
'description': 'md5:e2d52ff12214fa937107d21064075bf1',
|
||||||
|
'duration': 5527.6,
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
_USER_AGENT = 'Dalvik/1.6.0 (Linux; U; Android 4.3; GT-I9300 Build/JSS15J'
|
||||||
|
_MAGIC = 'QWtMLXs414Yo+c#_+Q#K@NN)'
|
||||||
|
_TIMESTAMP_SHIFT = 30000
|
||||||
|
|
||||||
|
_TIME_API_URL = 'http://servicios.atresplayer.com/api/admin/time.json'
|
||||||
|
_URL_VIDEO_TEMPLATE = 'https://servicios.atresplayer.com/api/urlVideo/{1}/{0}/{1}|{2}|{3}.json'
|
||||||
|
_PLAYER_URL_TEMPLATE = 'https://servicios.atresplayer.com/episode/getplayer.json?episodePk=%s'
|
||||||
|
_EPISODE_URL_TEMPLATE = 'http://www.atresplayer.com/episodexml/%s'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
episode_id = self._search_regex(
|
||||||
|
r'episode="([^"]+)"', webpage, 'episode id')
|
||||||
|
|
||||||
|
timestamp = int_or_none(self._download_webpage(
|
||||||
|
self._TIME_API_URL,
|
||||||
|
video_id, 'Downloading timestamp', fatal=False), 1000, time.time())
|
||||||
|
timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT)
|
||||||
|
token = hmac.new(
|
||||||
|
self._MAGIC.encode('ascii'),
|
||||||
|
(episode_id + timestamp_shifted).encode('utf-8')
|
||||||
|
).hexdigest()
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for fmt in ['windows', 'android_tablet']:
|
||||||
|
request = compat_urllib_request.Request(
|
||||||
|
self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token))
|
||||||
|
request.add_header('Youtubedl-user-agent', self._USER_AGENT)
|
||||||
|
|
||||||
|
fmt_json = self._download_json(
|
||||||
|
request, video_id, 'Downloading %s video JSON' % fmt)
|
||||||
|
|
||||||
|
result = fmt_json.get('resultDes')
|
||||||
|
if result.lower() != 'ok':
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s returned error: %s' % (self.IE_NAME, result), expected=True)
|
||||||
|
|
||||||
|
for _, video_url in fmt_json['resultObject'].items():
|
||||||
|
if video_url.endswith('/Manifest'):
|
||||||
|
formats.extend(self._extract_f4m_formats(video_url[:-9] + '/manifest.f4m', video_id))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'format_id': 'android',
|
||||||
|
'preference': 1,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
player = self._download_json(
|
||||||
|
self._PLAYER_URL_TEMPLATE % episode_id,
|
||||||
|
episode_id)
|
||||||
|
|
||||||
|
path_data = player.get('pathData')
|
||||||
|
|
||||||
|
episode = self._download_xml(
|
||||||
|
self._EPISODE_URL_TEMPLATE % path_data,
|
||||||
|
video_id, 'Downloading episode XML')
|
||||||
|
|
||||||
|
duration = float_or_none(xpath_text(
|
||||||
|
episode, './media/asset/info/technical/contentDuration', 'duration'))
|
||||||
|
|
||||||
|
art = episode.find('./media/asset/info/art')
|
||||||
|
title = xpath_text(art, './name', 'title')
|
||||||
|
description = xpath_text(art, './description', 'description')
|
||||||
|
thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -12,7 +12,7 @@
|
|||||||
|
|
||||||
class CNNIE(InfoExtractor):
|
class CNNIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
|
_VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
|
||||||
(?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn(?:-ap)?|(?=&)))'''
|
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:cnn|hln)(?:-ap)?|(?=&)))'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
|
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
|
||||||
@ -35,6 +35,16 @@ class CNNIE(InfoExtractor):
|
|||||||
"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
|
"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"",
|
||||||
"upload_date": "20130821",
|
"upload_date": "20130821",
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html',
|
||||||
|
'md5': 'f14d02ebd264df951feb2400e2c25a1b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Nashville Ep. 1: Hand crafted skateboards',
|
||||||
|
'description': 'md5:e7223a503315c9f150acac52e76de086',
|
||||||
|
'upload_date': '20141222',
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -38,7 +38,7 @@ def _real_extract(self, url):
|
|||||||
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
|
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
|
||||||
webpage = self._download_webpage(canonical_url, video_id)
|
webpage = self._download_webpage(canonical_url, video_id)
|
||||||
full_id = self._search_regex(
|
full_id = self._search_regex(
|
||||||
r'<iframe src="http://videofarm.daum.net/controller/video/viewer/Video.html\?.*?vid=(.+?)[&"]',
|
r'src=["\']http://videofarm\.daum\.net/controller/video/viewer/Video\.html\?.*?vid=(.+?)[&"\']',
|
||||||
webpage, 'full id')
|
webpage, 'full id')
|
||||||
query = compat_urllib_parse.urlencode({'vid': full_id})
|
query = compat_urllib_parse.urlencode({'vid': full_id})
|
||||||
info = self._download_xml(
|
info = self._download_xml(
|
||||||
|
71
youtube_dl/extractor/hellporno.py
Normal file
71
youtube_dl/extractor/hellporno.py
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
js_to_json,
|
||||||
|
remove_end,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class HellPornoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?hellporno\.com/videos/(?P<id>[^/]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/',
|
||||||
|
'md5': '1fee339c610d2049699ef2aa699439f1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '149116',
|
||||||
|
'display_id': 'dixie-is-posing-with-naked-ass-very-erotic',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Dixie is posing with naked ass very erotic',
|
||||||
|
'thumbnail': 're:https?://.*\.jpg$',
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
title = remove_end(self._html_search_regex(
|
||||||
|
r'<title>([^<]+)</title>', webpage, 'title'), ' - Hell Porno')
|
||||||
|
|
||||||
|
flashvars = self._parse_json(self._search_regex(
|
||||||
|
r'var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'),
|
||||||
|
display_id, transform_source=js_to_json)
|
||||||
|
|
||||||
|
video_id = flashvars.get('video_id')
|
||||||
|
thumbnail = flashvars.get('preview_url')
|
||||||
|
ext = flashvars.get('postfix', '.mp4')[1:]
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for video_url_key in ['video_url', 'video_alt_url']:
|
||||||
|
video_url = flashvars.get(video_url_key)
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
video_text = flashvars.get('%s_text' % video_url_key)
|
||||||
|
fmt = {
|
||||||
|
'url': video_url,
|
||||||
|
'ext': ext,
|
||||||
|
'format_id': video_text,
|
||||||
|
}
|
||||||
|
m = re.search(r'^(?P<height>\d+)[pP]', video_text)
|
||||||
|
if m:
|
||||||
|
fmt['height'] = int(m.group('height'))
|
||||||
|
formats.append(fmt)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
categories = self._html_search_meta(
|
||||||
|
'keywords', webpage, 'categories', default='').split(',')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'categories': categories,
|
||||||
|
'age_limit': 18,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
166
youtube_dl/extractor/hitbox.py
Normal file
166
youtube_dl/extractor/hitbox.py
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
parse_iso8601,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
compat_str,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class HitboxIE(InfoExtractor):
|
||||||
|
IE_NAME = 'hitbox'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?hitbox\.tv/video/(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.hitbox.tv/video/203213',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '203213',
|
||||||
|
'title': 'hitbox @ gamescom, Sub Button Hype extended, Giveaway - hitbox News Update with Oxy',
|
||||||
|
'alt_title': 'hitboxlive - Aug 9th #6',
|
||||||
|
'description': '',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'thumbnail': 're:^https?://.*\.jpg$',
|
||||||
|
'duration': 215.1666,
|
||||||
|
'resolution': 'HD 720p',
|
||||||
|
'uploader': 'hitboxlive',
|
||||||
|
'view_count': int,
|
||||||
|
'timestamp': 1407576133,
|
||||||
|
'upload_date': '20140809',
|
||||||
|
'categories': ['Live Show'],
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _extract_metadata(self, url, video_id):
|
||||||
|
thumb_base = 'https://edge.sf.hitbox.tv'
|
||||||
|
metadata = self._download_json(
|
||||||
|
'%s/%s' % (url, video_id), video_id)
|
||||||
|
|
||||||
|
date = 'media_live_since'
|
||||||
|
media_type = 'livestream'
|
||||||
|
if metadata.get('media_type') == 'video':
|
||||||
|
media_type = 'video'
|
||||||
|
date = 'media_date_added'
|
||||||
|
|
||||||
|
video_meta = metadata.get(media_type, [])[0]
|
||||||
|
title = video_meta.get('media_status')
|
||||||
|
alt_title = video_meta.get('media_title')
|
||||||
|
description = clean_html(
|
||||||
|
video_meta.get('media_description') or
|
||||||
|
video_meta.get('media_description_md'))
|
||||||
|
duration = float_or_none(video_meta.get('media_duration'))
|
||||||
|
uploader = video_meta.get('media_user_name')
|
||||||
|
views = int_or_none(video_meta.get('media_views'))
|
||||||
|
timestamp = parse_iso8601(video_meta.get(date), ' ')
|
||||||
|
categories = [video_meta.get('category_name')]
|
||||||
|
thumbs = [
|
||||||
|
{'url': thumb_base + video_meta.get('media_thumbnail'),
|
||||||
|
'width': 320,
|
||||||
|
'height': 180},
|
||||||
|
{'url': thumb_base + video_meta.get('media_thumbnail_large'),
|
||||||
|
'width': 768,
|
||||||
|
'height': 432},
|
||||||
|
]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'alt_title': alt_title,
|
||||||
|
'description': description,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'thumbnails': thumbs,
|
||||||
|
'duration': duration,
|
||||||
|
'uploader': uploader,
|
||||||
|
'view_count': views,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'categories': categories,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
metadata = self._extract_metadata(
|
||||||
|
'https://www.hitbox.tv/api/media/video',
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
player_config = self._download_json(
|
||||||
|
'https://www.hitbox.tv/api/player/config/video/%s' % video_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
clip = player_config.get('clip')
|
||||||
|
video_url = clip.get('url')
|
||||||
|
res = clip.get('bitrates', [])[0].get('label')
|
||||||
|
|
||||||
|
metadata['resolution'] = res
|
||||||
|
metadata['url'] = video_url
|
||||||
|
metadata['protocol'] = 'm3u8'
|
||||||
|
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
|
||||||
|
class HitboxLiveIE(HitboxIE):
|
||||||
|
IE_NAME = 'hitbox:live'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?hitbox\.tv/(?!video)(?P<id>.+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.hitbox.tv/dimak',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'dimak',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:c9f80fa4410bc588d7faa40003fc7d0e',
|
||||||
|
'timestamp': int,
|
||||||
|
'upload_date': compat_str,
|
||||||
|
'title': compat_str,
|
||||||
|
'uploader': 'Dimak',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# live
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
metadata = self._extract_metadata(
|
||||||
|
'https://www.hitbox.tv/api/media/live',
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
player_config = self._download_json(
|
||||||
|
'https://www.hitbox.tv/api/player/config/live/%s' % video_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
cdns = player_config.get('cdns')
|
||||||
|
servers = []
|
||||||
|
for cdn in cdns:
|
||||||
|
base_url = cdn.get('netConnectionUrl')
|
||||||
|
host = re.search('.+\.([^\.]+\.[^\./]+)/.+', base_url).group(1)
|
||||||
|
if base_url not in servers:
|
||||||
|
servers.append(base_url)
|
||||||
|
for stream in cdn.get('bitrates'):
|
||||||
|
label = stream.get('label')
|
||||||
|
if label != 'Auto':
|
||||||
|
formats.append({
|
||||||
|
'url': '%s/%s' % (base_url, stream.get('url')),
|
||||||
|
'ext': 'mp4',
|
||||||
|
'vbr': stream.get('bitrate'),
|
||||||
|
'resolution': label,
|
||||||
|
'rtmp_live': True,
|
||||||
|
'format_note': host,
|
||||||
|
'page_url': url,
|
||||||
|
'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
metadata['formats'] = formats
|
||||||
|
metadata['is_live'] = True
|
||||||
|
metadata['title'] = self._live_title(metadata.get('title'))
|
||||||
|
return metadata
|
81
youtube_dl/extractor/xxxymovies.py
Normal file
81
youtube_dl/extractor/xxxymovies.py
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
parse_duration,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class XXXYMoviesIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?xxxymovies\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://xxxymovies.com/videos/138669/ecstatic-orgasm-sofcore/',
|
||||||
|
'md5': '810b1bdbbffff89dd13bdb369fe7be4b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '138669',
|
||||||
|
'display_id': 'ecstatic-orgasm-sofcore',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ecstatic Orgasm Sofcore',
|
||||||
|
'duration': 931,
|
||||||
|
'categories': list,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'age_limit': 18,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
display_id = mobj.group('display_id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
video_url = self._search_regex(
|
||||||
|
r"video_url\s*:\s*'([^']+)'", webpage, 'video URL')
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
[r'<div class="block_header">\s*<h1>([^<]+)</h1>',
|
||||||
|
r'<title>(.*?)\s*-\s*XXXYMovies\.com</title>'],
|
||||||
|
webpage, 'title')
|
||||||
|
|
||||||
|
thumbnail = self._search_regex(
|
||||||
|
r"preview_url\s*:\s*'([^']+)'",
|
||||||
|
webpage, 'thumbnail', fatal=False)
|
||||||
|
|
||||||
|
categories = self._html_search_meta(
|
||||||
|
'keywords', webpage, 'categories', default='').split(',')
|
||||||
|
|
||||||
|
duration = parse_duration(self._search_regex(
|
||||||
|
r'<span>Duration:</span>\s*(\d+:\d+)',
|
||||||
|
webpage, 'duration', fatal=False))
|
||||||
|
|
||||||
|
view_count = int_or_none(self._html_search_regex(
|
||||||
|
r'<div class="video_views">\s*(\d+)',
|
||||||
|
webpage, 'view count', fatal=False))
|
||||||
|
like_count = int_or_none(self._search_regex(
|
||||||
|
r'>\s*Likes? <b>\((\d+)\)',
|
||||||
|
webpage, 'like count', fatal=False))
|
||||||
|
dislike_count = int_or_none(self._search_regex(
|
||||||
|
r'>\s*Dislike <b>\((\d+)\)</b>',
|
||||||
|
webpage, 'dislike count', fatal=False))
|
||||||
|
|
||||||
|
age_limit = self._rta_search(webpage)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'categories': categories,
|
||||||
|
'duration': duration,
|
||||||
|
'view_count': view_count,
|
||||||
|
'like_count': like_count,
|
||||||
|
'dislike_count': dislike_count,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
}
|
@ -418,6 +418,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
|||||||
'upload_date': '20140605',
|
'upload_date': '20140605',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# Age-gate video with encrypted signature
|
||||||
|
{
|
||||||
|
'url': 'http://www.youtube.com/watch?v=6kLq3WMV1nU',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6kLq3WMV1nU',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
|
||||||
|
'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
|
||||||
|
'uploader': 'LloydVEVO',
|
||||||
|
'uploader_id': 'LloydVEVO',
|
||||||
|
'upload_date': '20110629',
|
||||||
|
},
|
||||||
|
},
|
||||||
# video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
|
# video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
|
||||||
{
|
{
|
||||||
'url': '__2ABJjxzNo',
|
'url': '__2ABJjxzNo',
|
||||||
@ -766,11 +779,13 @@ def _real_extract(self, url):
|
|||||||
age_gate = True
|
age_gate = True
|
||||||
# We simulate the access to the video from www.youtube.com/v/{video_id}
|
# We simulate the access to the video from www.youtube.com/v/{video_id}
|
||||||
# this can be viewed without login into Youtube
|
# this can be viewed without login into Youtube
|
||||||
|
url = proto + '://www.youtube.com/embed/%s' % video_id
|
||||||
|
embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
|
||||||
data = compat_urllib_parse.urlencode({
|
data = compat_urllib_parse.urlencode({
|
||||||
'video_id': video_id,
|
'video_id': video_id,
|
||||||
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
|
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
|
||||||
'sts': self._search_regex(
|
'sts': self._search_regex(
|
||||||
r'"sts"\s*:\s*(\d+)', video_webpage, 'sts', default=''),
|
r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
|
||||||
})
|
})
|
||||||
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
|
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
|
||||||
video_info_webpage = self._download_webpage(
|
video_info_webpage = self._download_webpage(
|
||||||
@ -968,10 +983,9 @@ def _map_to_format_list(urlmap):
|
|||||||
elif 's' in url_data:
|
elif 's' in url_data:
|
||||||
encrypted_sig = url_data['s'][0]
|
encrypted_sig = url_data['s'][0]
|
||||||
|
|
||||||
if not age_gate:
|
|
||||||
jsplayer_url_json = self._search_regex(
|
jsplayer_url_json = self._search_regex(
|
||||||
r'"assets":.+?"js":\s*("[^"]+")',
|
r'"assets":.+?"js":\s*("[^"]+")',
|
||||||
video_webpage, 'JS player URL')
|
embed_webpage if age_gate else video_webpage, 'JS player URL')
|
||||||
player_url = json.loads(jsplayer_url_json)
|
player_url = json.loads(jsplayer_url_json)
|
||||||
if player_url is None:
|
if player_url is None:
|
||||||
player_url_json = self._search_regex(
|
player_url_json = self._search_regex(
|
||||||
|
Loading…
Reference in New Issue
Block a user