From c816336cbdb91efa282c0ede8552157861f10e76 Mon Sep 17 00:00:00 2001 From: net Date: Mon, 29 Sep 2014 21:58:42 +0300 Subject: [PATCH] [karaoketv] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/karaoketv.py | 47 +++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 youtube_dl/extractor/karaoketv.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0792215673..d12e741a0b 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -173,6 +173,7 @@ from .justintv import JustinTVIE from .jpopsukitv import JpopsukiIE from .kankan import KankanIE +from .karaoketv import KaraoketvIE from .keezmovies import KeezMoviesIE from .khanacademy import KhanAcademyIE from .kickstarter import KickStarterIE diff --git a/youtube_dl/extractor/karaoketv.py b/youtube_dl/extractor/karaoketv.py new file mode 100644 index 0000000000..4d50308cc4 --- /dev/null +++ b/youtube_dl/extractor/karaoketv.py @@ -0,0 +1,47 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import json +import sys + +from .common import InfoExtractor +from ..utils import compat_urllib_parse, ExtractorError + + +class KaraoketvIE(InfoExtractor): + _VALID_URL = r'http://karaoketv\.co\.il/\?container=songs&id=(?P[0-9]+)' + _TEST = { + 'url': 'http://karaoketv.co.il/?container=songs&id=171568', + 'info_dict': { + 'id': '171568', + 'ext': 'mp4', + 'title': 'אל העולם שלך - רותם כהן - שרים קריוקי', + } + } + + def _real_extract(self, url): + + # BUG: SSL23_GET_SERVER_HELLO:unknown protocol + if sys.hexversion < 0x03000000: + raise ExtractorError("Only python 3 supported.\n") + + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + settings_json = compat_urllib_parse.unquote_plus(self._search_regex(r'config=(.*)', self._og_search_video_url(webpage ,video_id), '')) + + urls_info_webpage = self._download_webpage(settings_json, 'Downloading settings json') + + urls_info_json = json.loads(urls_info_webpage.replace('\'', '"')) + + url = urls_info_json['playlist'][0]['url'] + + return { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'url': url, + } \ No newline at end of file