From 7c6eb424d35e51c81f8fe9e1eb7cc18067c3a8a7 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 2 Sep 2022 01:28:56 +0530 Subject: [PATCH] [extractor/youtube] Detect `lazy-load-for-videos` embeds Closes #4812 --- yt_dlp/extractor/youtube.py | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 2748b5dc5..4a5d6805e 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -923,19 +923,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:\#|$)""" % { 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), } - _EMBED_REGEX = [r'''(?x) - (?: - ]+?src=| - data-video-url=| - ]+?src=| - embedSWF\(?:\s*| - ]+data=| - new\s+SWFObject\( - ) - (["\']) - (?P(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/ - (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?) - \1'''] + _EMBED_REGEX = [ + r'''(?x) + (?: + ]+?src=| + data-video-url=| + ]+?src=| + embedSWF\(?:\s*| + ]+data=| + new\s+SWFObject\( + ) + (["\']) + (?P(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/ + (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?) + \1''', + # https://wordpress.org/plugins/lazy-load-for-videos/ + r'''(?xs) + ]*\bhref="(?Phttps://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})" + \s[^>]*\bclass="[^"]*\blazy-load-youtube''', + ] + _PLAYER_INFO_RE = ( r'/s/player/(?P[a-zA-Z0-9_-]{8,})/player', r'/(?P[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',