From 89c0dc9a5fadc3927f7c03f5829e4f2ef8555888 Mon Sep 17 00:00:00 2001 From: BlahGeek Date: Sat, 30 Apr 2016 21:32:54 +0800 Subject: [PATCH] [xiami] Add xiami extractor --- youtube_dl/extractor/extractors.py | 6 ++ youtube_dl/extractor/xiami.py | 161 +++++++++++++++++++++++++++++ 2 files changed, 167 insertions(+) create mode 100644 youtube_dl/extractor/xiami.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b1b7f9b428..14ca9eaeee 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -941,6 +941,12 @@ XHamsterIE, XHamsterEmbedIE, ) +from .xiami import ( + XiamiIE, + XiamiAlbumIE, + XiamiArtistIE, + XiamiCollectionIE +) from .xminus import XMinusIE from .xnxx import XNXXIE from .xstream import XstreamIE diff --git a/youtube_dl/extractor/xiami.py b/youtube_dl/extractor/xiami.py new file mode 100644 index 0000000000..a28d63c488 --- /dev/null +++ b/youtube_dl/extractor/xiami.py @@ -0,0 +1,161 @@ +# -*- coding: utf-8 -*- + +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + xpath_element, + xpath_text, + xpath_with_ns, + int_or_none, + ExtractorError +) +from ..compat import compat_urllib_parse_unquote + + +class XiamiBaseIE(InfoExtractor): + + _XML_BASE_URL = 'http://www.xiami.com/song/playlist/id' + _NS_MAP = {'xm': 'http://xspf.org/ns/0/'} + + def _extract_track(self, track): + artist = xpath_text(track, xpath_with_ns('xm:artist', self._NS_MAP), default='') + artist = artist.split(';') + + ret = { + 'id': xpath_text(track, xpath_with_ns('xm:song_id', self._NS_MAP)), + 'title': xpath_text(track, xpath_with_ns('xm:title', self._NS_MAP)), + 'album': xpath_text(track, xpath_with_ns('xm:album_name', self._NS_MAP)), + 'artist': ';'.join(artist) if artist else None, + 'creator': artist[0] if artist else None, + 'url': self._decrypt(xpath_text(track, xpath_with_ns('xm:location', self._NS_MAP))), + 'thumbnail': xpath_text(track, xpath_with_ns('xm:pic', self._NS_MAP), default=None), + 'duration': int_or_none(xpath_text(track, xpath_with_ns('xm:length', self._NS_MAP))), + } + + lyrics_url = xpath_text(track, xpath_with_ns('xm:lyric', self._NS_MAP)) + if lyrics_url and lyrics_url.endswith('.lrc'): + ret['description'] = self._download_webpage(lyrics_url, ret['id']) + return ret + + def _extract_xml(self, _id, typ=''): + playlist = self._download_xml('%s/%s%s' % (self._XML_BASE_URL, _id, typ), _id) + tracklist = xpath_element(playlist, xpath_with_ns('./xm:trackList', self._NS_MAP)) + + if not len(tracklist): + raise ExtractorError('No track found') + return [self._extract_track(track) for track in tracklist] + + @staticmethod + def _decrypt(origin): + n = int(origin[0]) + origin = origin[1:] + short_lenth = len(origin) // n + long_num = len(origin) - short_lenth * n + l = tuple() + for i in range(0, n): + length = short_lenth + if i < long_num: + length += 1 + l += (origin[0:length], ) + origin = origin[length:] + ans = '' + for i in range(0, short_lenth + 1): + for j in range(0, n): + if len(l[j])>i: + ans += l[j][i] + return compat_urllib_parse_unquote(ans).replace('^', '0') + + +class XiamiIE(XiamiBaseIE): + IE_NAME = 'xiami:song' + IE_DESC = '虾米音乐' + _VALID_URL = r'http://www\.xiami\.com/song/(?P[0-9]+)' + _TESTS = [ + { + 'url': 'http://www.xiami.com/song/1775610518', + 'md5': '521dd6bea40fd5c9c69f913c232cb57e', + 'info_dict': { + 'id': '1775610518', + 'ext': 'mp3', + 'title': 'Woman', + 'creator': 'HONNE', + 'album': 'Woman', + 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', + 'description': 'md5:052ec7de41ca19f67e7fd70a1bfc4e0b', + } + }, + { + 'url': 'http://www.xiami.com/song/1775256504', + 'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc', + 'info_dict': { + 'id': '1775256504', + 'ext': 'mp3', + 'title': '悟空', + 'creator': '戴荃', + 'album': '悟空', + 'description': 'md5:206e67e84f9bed1d473d04196a00b990', + } + }, + ] + + def _real_extract(self, url): + _id = self._match_id(url) + return self._extract_xml(_id)[0] + + +class XiamiAlbumIE(XiamiBaseIE): + IE_NAME = 'xiami:album' + IE_DESC = '虾米音乐 - 专辑' + _VALID_URL = r'http://www\.xiami\.com/album/(?P[0-9]+)' + _TESTS = [ + { + 'url': 'http://www.xiami.com/album/2100300444', + 'info_dict': { + 'id': '2100300444', + }, + 'playlist_count': 10, + }, + { + 'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9', + 'only_matching': True, + } + ] + + def _real_extract(self, url): + _id = self._match_id(url) + return self.playlist_result(self._extract_xml(_id, '/type/1'), _id) + + +class XiamiArtistIE(XiamiBaseIE): + IE_NAME = 'xiami:artist' + IE_DESC = '虾米音乐 - 歌手' + _VALID_URL = r'http://www\.xiami\.com/artist/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp', + 'info_dict': { + 'id': '2132', + }, + 'playlist_count': 20, + } + + def _real_extract(self, url): + _id = self._match_id(url) + return self.playlist_result(self._extract_xml(_id, '/type/2'), _id) + + +class XiamiCollectionIE(XiamiBaseIE): + IE_NAME = 'xiami:collection' + IE_DESC = '虾米音乐 - 精选集' + _VALID_URL = r'http://www\.xiami\.com/collect/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr', + 'info_dict': { + 'id': '156527391', + }, + 'playlist_count': 26, + } + + def _real_extract(self, url): + _id = self._match_id(url) + return self.playlist_result(self._extract_xml(_id, '/type/3'), _id)