From 6bb608d055e3dd3d73dcc010f945158153274238 Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" Date: Fri, 18 Feb 2022 02:15:29 +0900 Subject: [PATCH] [piapro] Add extractor (#2801) Based on https://github.com/ytdl-org/youtube-dl/pull/25922 Closes #2710, https://github.com/ytdl-org/youtube-dl/issues/5856 Authored by: pycabbage, Lesmiscore --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/piapro.py | 100 +++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 yt_dlp/extractor/piapro.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 923bf6de9..15bc74915 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1162,6 +1162,7 @@ from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE +from .piapro import PiaproIE from .picarto import ( PicartoIE, PicartoVodIE, diff --git a/yt_dlp/extractor/piapro.py b/yt_dlp/extractor/piapro.py new file mode 100644 index 000000000..497e1edbc --- /dev/null +++ b/yt_dlp/extractor/piapro.py @@ -0,0 +1,100 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_urlparse +from ..utils import ( + ExtractorError, + parse_duration, + parse_filesize, + str_to_int, + unified_timestamp, + urlencode_postdata, +) + + +class PiaproIE(InfoExtractor): + _NETRC_MACHINE = 'piapro' + _VALID_URL = r'https?://piapro\.jp/t/(?P\w+)/?' + _TESTS = [{ + 'url': 'https://piapro.jp/t/NXYR', + 'md5': 'a9d52f27d13bafab7ee34116a7dcfa77', + 'info_dict': { + 'id': 'NXYR', + 'ext': 'mp3', + 'uploader': 'wowaka', + 'uploader_id': 'wowaka', + 'title': '裏表ラバーズ', + 'thumbnail': r're:^https?://.*\.jpg$', + } + }] + + def _real_initialize(self): + self._login_status = self._login() + + def _login(self): + username, password = self._get_login_info() + if not username: + return False + login_ok = True + login_form_strs = { + '_username': username, + '_password': password, + '_remember_me': 'on', + 'login': 'ログイン' + } + self._request_webpage('https://piapro.jp/login/', None) + urlh = self._request_webpage( + 'https://piapro.jp/login/exe', None, + note='Logging in', errnote='Unable to log in', + data=urlencode_postdata(login_form_strs)) + if urlh is False: + login_ok = False + else: + parts = compat_urlparse.urlparse(urlh.geturl()) + if parts.path != '/': + login_ok = False + if not login_ok: + self.report_warning( + 'unable to log in: bad username or password') + return login_ok + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + category_id = self._search_regex(r'categoryId=(.+)">', webpage, 'category ID') + if category_id not in ('1', '2', '21', '22', '23', '24', '25'): + raise ExtractorError('The URL does not contain audio.', expected=True) + + str_duration, str_filesize = self._search_regex( + r'サイズ:(.+?)/\(([0-9,]+?[KMG]?B))', webpage, 'duration and size', + group=(1, 2), default=(None, None)) + str_viewcount = self._search_regex(r'閲覧数:([0-9,]+)\s+', webpage, 'view count', fatal=False) + + uploader_id, uploader = self._search_regex( + r'([^<]+)さん<', webpage, 'uploader', + group=(1, 2), default=(None, None)) + content_id = self._search_regex(r'contentId\:\'(.+)\'', webpage, 'content ID') + create_date = self._search_regex(r'createDate\:\'(.+)\'', webpage, 'timestamp') + + player_webpage = self._download_webpage( + f'https://piapro.jp/html5_player_popup/?id={content_id}&cdate={create_date}', + video_id, note='Downloading player webpage') + + return { + 'id': video_id, + 'title': self._html_search_regex(r'(.+?)', webpage, 'title', fatal=False), + 'description': self._html_search_regex(r'(.+?)

\s*