diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index 9c62191b5..1095a26e2 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -9,6 +9,7 @@ get_element_by_id, clean_html, ExtractorError, + remove_start, ) @@ -73,10 +74,10 @@ def _real_extract(self, url): errnote='Unable to get song detail info') song_name = self._html_search_regex( - r'
.+?]+class="album"[^<]+]+href="http://www\.kuwo\.cn/album/(\d+)/"',
webpage, 'album id', default=None, fatal=False)
publish_time = None
@@ -131,15 +132,16 @@ def _real_extract(self, url):
errnote='Unable to get album info')
album_name = self._html_search_regex(
- r' ]+class="listen">]+href="http://www\.kuwo\.cn/yinyue/(\d+)/"',
webpage)
]
return self.playlist_result(entries, album_id, album_name, album_intro)
@@ -147,7 +149,7 @@ def _real_extract(self, url):
class KuwoChartIE(InfoExtractor):
IE_NAME = 'kuwo:chart'
- _VALID_URL = r'http://yinyue\.kuwo\.cn/billboard_(?P ([0-9]{4}第[0-9]{2}期) ]+class="tabDef">(\d{4}第\d{2}期)]+title="([^"]+)"', webpage,
+ 'album name')
+ album_intro = remove_start(
+ clean_html(get_element_by_id("intro", webpage)),
+ '%s简介:' % album_name)
entries = [
self.url_result("http://www.kuwo.cn/yinyue/%s/" % song_id, 'Kuwo', song_id)
for song_id in re.findall(
- r'
',
+ r'(.+?)
', webpage, 'chart name')
+ r']+class="unDis">([^<]+)
', webpage, 'chart name')
chart_desc = self._html_search_regex(
- r'(.+?)\s*
([^<]+)]+href="http://www\.kuwo\.cn/yinyue/([0-9]+)/',
webpage)
][:10 if first_page_only else None])
- if first_page_only or not re.search(r'下一页', webpage):
+ if first_page_only or not re.search(r']+href="[^"]+">下一页', webpage):
break
return self.playlist_result(entries, singer_id, singer_name)
@@ -248,13 +250,14 @@ def _real_extract(self, url):
errnote='Unable to get category info')
category_name = self._html_search_regex(
- r'
[^<>]+?
', webpage, 'category name')
+ r']+title="([^<>]+?)">[^<>]+?
', webpage, 'category name')
- category_desc = re.sub(
- r'^.+简介:', '', get_element_by_id("intro", webpage).strip())
+ category_desc = remove_start(
+ get_element_by_id("intro", webpage).strip(),
+ '%s简介:' % category_name)
jsonm = self._parse_json(self._html_search_regex(
- r'var jsonm = (\{.+?\});', webpage, 'category songs'), category_id)
+ r'var\s+jsonm\s*=\s*([^;]+);', webpage, 'category songs'), category_id)
entries = [
self.url_result(
@@ -289,7 +292,7 @@ def _real_extract(self, url):
errnote='Unable to get mv detail info: %s' % song_id)
mobj = re.search(
- r'[^<>]+.+?)".+?>[^<>]+
',
+ r']+title="(?P