From 3fd4c2a543934501253dc5ffaab30ed3b89282a0 Mon Sep 17 00:00:00 2001 From: felix Date: Sun, 23 May 2021 18:34:53 +0200 Subject: [PATCH] [mediasite] Extract slides (#343) Fixes: https://github.com/ytdl-org/youtube-dl/issues/4974#issue-58006762 https://github.com/ytdl-org/youtube-dl/issues/4540#issuecomment-69574231 https://github.com/ytdl-org/youtube-dl/pull/11185#issuecomment-335554239 Authored by: fstirlitz --- yt_dlp/extractor/mediasite.py | 59 ++++++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/mediasite.py b/yt_dlp/extractor/mediasite.py index 5d083a1cd..c62233ab7 100644 --- a/yt_dlp/extractor/mediasite.py +++ b/yt_dlp/extractor/mediasite.py @@ -122,6 +122,52 @@ def _extract_urls(webpage): r'(?xi)]+\bsrc=(["\'])(?P(?:(?:https?:)?//[^/]+)?/Mediasite/Play/%s(?:\?.*?)?)\1' % _ID_RE, webpage)] + def __extract_slides(self, *, stream_id, snum, Stream, duration, images): + slide_base_url = Stream['SlideBaseUrl'] + + fname_template = Stream['SlideImageFileNameTemplate'] + if fname_template != 'slide_{0:D4}.jpg': + self.report_warning('Unusual slide file name template; report a bug if slide downloading fails') + fname_template = re.sub(r'\{0:D([0-9]+)\}', r'{0:0\1}', fname_template) + + fragments = [] + for i, slide in enumerate(Stream['Slides']): + if i == 0: + if slide['Time'] > 0: + default_slide = images.get('DefaultSlide') + if default_slide is None: + default_slide = images.get('DefaultStreamImage') + if default_slide is not None: + default_slide = default_slide['ImageFilename'] + if default_slide is not None: + fragments.append({ + 'path': default_slide, + 'duration': slide['Time'] / 1000, + }) + + next_time = try_get(None, [ + lambda _: Stream['Slides'][i + 1]['Time'], + lambda _: duration, + lambda _: slide['Time'], + ], expected_type=(int, float)) + + fragments.append({ + 'path': fname_template.format(slide.get('Number', i + 1)), + 'duration': (next_time - slide['Time']) / 1000 + }) + + return { + 'format_id': '%s-%u.slides' % (stream_id, snum), + 'ext': 'mhtml', + 'url': slide_base_url, + 'protocol': 'mhtml', + 'acodec': 'none', + 'vcodec': 'none', + 'format_note': 'Slides', + 'fragments': fragments, + 'fragment_base_url': slide_base_url, + } + def _real_extract(self, url): url, data = unsmuggle_url(url, {}) mobj = re.match(self._VALID_URL, url) @@ -198,10 +244,15 @@ def _real_extract(self, url): 'ext': mimetype2ext(VideoUrl.get('MimeType')), }) - # TODO: if Stream['HasSlideContent']: - # synthesise an MJPEG video stream '%s-%u.slides' % (stream_type, snum) - # from Stream['Slides'] - # this will require writing a custom downloader... + if Stream.get('HasSlideContent', False): + images = player_options['PlayerLayoutOptions']['Images'] + stream_formats.append(self.__extract_slides( + stream_id=stream_id, + snum=snum, + Stream=Stream, + duration=presentation.get('Duration'), + images=images, + )) # disprefer 'secondary' streams if stream_type != 0: