[pornhub] Improve and simplify (closes #19135)

This commit is contained in:
Sergey M․ 2019-02-05 23:06:55 +07:00
parent d2d970d07e
commit 5dda1edef9
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -302,17 +302,12 @@ def parse_js_value(inp):
comment_count = self._extract_count( comment_count = self._extract_count(
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
def _get_items(class_name): def extract_list(meta_key):
div = self._search_regex( div = self._search_regex(
r'<div class="' + class_name + '">([\S\s]+?)</div>', r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>'
webpage, class_name, default=None) % meta_key, webpage, meta_key, default=None)
if div: if div:
return [a for a in re.findall(r'<a href=[^>]+>([^<]+)', div)] return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div)
else:
return None
categories = _get_items('categoriesWrapper')
tags = _get_items('tagsWrapper')
return { return {
'id': video_id, 'id': video_id,
@ -327,8 +322,8 @@ def _get_items(class_name):
'comment_count': comment_count, 'comment_count': comment_count,
'formats': formats, 'formats': formats,
'age_limit': 18, 'age_limit': 18,
'tags': tags, 'tags': extract_list('tags'),
'categories': categories, 'categories': extract_list('categories'),
'subtitles': subtitles, 'subtitles': subtitles,
} }