From d34e79492d1d9d9babf85af3737b90c4fe55eb42 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 8 Mar 2015 16:54:11 +0600
Subject: [PATCH 01/16] [twitch] Fix live streams (Closes #5158)

---
 youtube_dl/extractor/twitch.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py
index b058891bd..cbdaf9c7a 100644
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -358,13 +358,12 @@ def _real_extract(self, url):
             'p': random.randint(1000000, 10000000),
             'player': 'twitchweb',
             'segment_preference': '4',
-            'sig': access_token['sig'],
-            'token': access_token['token'],
+            'sig': access_token['sig'].encode('utf-8'),
+            'token': access_token['token'].encode('utf-8'),
         }
-
         formats = self._extract_m3u8_formats(
             '%s/api/channel/hls/%s.m3u8?%s'
-            % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')),
+            % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query)),
             channel_id, 'mp4')
         self._prefer_source(formats)
 

From 1132eae56d0f693b5dc55529d5cfadf32b32700d Mon Sep 17 00:00:00 2001
From: Naglis Jonaitis <njonaitis@gmail.com>
Date: Sun, 8 Mar 2015 13:54:01 +0200
Subject: [PATCH 02/16] [gazeta] Add new extractor (Closes #4222)

---
 youtube_dl/extractor/__init__.py |  1 +
 youtube_dl/extractor/gazeta.py   | 35 ++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)
 create mode 100644 youtube_dl/extractor/gazeta.py

diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index b489d5770..e2475a634 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -175,6 +175,7 @@
 from .gamespot import GameSpotIE
 from .gamestar import GameStarIE
 from .gametrailers import GametrailersIE
+from .gazeta import GazetaIE
 from .gdcvault import GDCVaultIE
 from .generic import GenericIE
 from .giantbomb import GiantBombIE
diff --git a/youtube_dl/extractor/gazeta.py b/youtube_dl/extractor/gazeta.py
new file mode 100644
index 000000000..3f9e58061
--- /dev/null
+++ b/youtube_dl/extractor/gazeta.py
@@ -0,0 +1,35 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class GazetaIE(InfoExtractor):
+    _VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:(?P<category>[^/]*)/)?video/(?:main/)?(?P<id>[A-Za-z0-9-_]+)\.s?html)'
+    _TEST = {
+        'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml',
+        'md5': 'd49c9bdc6e5a7888f27475dc215ee789',
+        'info_dict': {
+            'id': '205566',
+            'ext': 'mp4',
+            'title': '«70–80 процентов гражданских в Донецке на грани голода»',
+            'description': 'md5:38617526050bd17b234728e7f9620a71',
+            'thumbnail': 're:^https?://.*\.jpg',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+
+        display_id = mobj.group('id')
+        embed_url = '%s?p=embed' % mobj.group('url')
+        embed_page = self._download_webpage(
+            embed_url, display_id, 'Downloading embed page')
+
+        video_id = self._search_regex(
+            r'<div[^>]*?class="eagleplayer"[^>]*?data-id="([^"]+)"', embed_page, 'video id')
+
+        return self.url_result(
+            'eagleplatform:gazeta.media.eagleplatform.com:%s' % video_id, 'EaglePlatform')

From 28778d6bae2ff2a0ad57d2c5a694e22ac4ead749 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 8 Mar 2015 18:03:12 +0600
Subject: [PATCH 03/16] [pladform] Add extractor

---
 youtube_dl/extractor/__init__.py |  1 +
 youtube_dl/extractor/pladform.py | 86 ++++++++++++++++++++++++++++++++
 2 files changed, 87 insertions(+)
 create mode 100644 youtube_dl/extractor/pladform.py

diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index e2475a634..14172ca56 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -364,6 +364,7 @@
 from .phoenix import PhoenixIE
 from .photobucket import PhotobucketIE
 from .planetaplay import PlanetaPlayIE
+from .pladform import PladformIE
 from .played import PlayedIE
 from .playfm import PlayFMIE
 from .playvid import PlayvidIE
diff --git a/youtube_dl/extractor/pladform.py b/youtube_dl/extractor/pladform.py
new file mode 100644
index 000000000..926e368a2
--- /dev/null
+++ b/youtube_dl/extractor/pladform.py
@@ -0,0 +1,86 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    xpath_text,
+)
+
+
+class PladformIE(InfoExtractor):
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:
+                            (?:
+                                out\.pladform\.ru/player|
+                                static\.pladform\.ru/player\.swf
+                            )
+                            \?.*\bvideoid=|
+                            video\.pladform\.ru/catalog/video/videoid/
+                        )
+                        (?P<id>\d+)
+                    '''
+    _TESTS = [{
+        # http://muz-tv.ru/kinozal/view/7400/
+        'url': 'http://out.pladform.ru/player?pl=24822&videoid=100183293',
+        'md5': '61f37b575dd27f1bb2e1854777fe31f4',
+        'info_dict': {
+            'id': '100183293',
+            'ext': 'mp4',
+            'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
+            'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 694,
+            'age_limit': 0,
+        },
+    }, {
+        'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0',
+        'only_matching': True,
+    }, {
+        'url': 'http://video.pladform.ru/catalog/video/videoid/100183293/vkcid/0',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        video = self._download_xml(
+            'http://out.pladform.ru/getVideo?pl=1&videoid=%s' % video_id,
+            video_id)
+
+        if video.tag == 'error':
+            raise ExtractorError(
+                '%s returned error: %s' % (self.IE_NAME, video.text),
+                expected=True)
+
+        formats = [{
+            'url': src.text,
+            'format_id': src.get('quality'),
+        } for src in video.findall('./src')]
+        self._sort_formats(formats)
+
+        webpage = self._download_webpage(
+            'http://video.pladform.ru/catalog/video/videoid/%s' % video_id,
+            video_id)
+
+        title = self._og_search_title(webpage, fatal=False) or xpath_text(
+            video, './/title', 'title', fatal=True)
+        description = self._search_regex(
+            r'</h3>\s*<p>([^<]+)</p>', webpage, 'description', fatal=False)
+        thumbnail = self._og_search_thumbnail(webpage) or xpath_text(
+            video, './/cover', 'cover')
+
+        duration = int_or_none(xpath_text(video, './/time', 'duration'))
+        age_limit = int_or_none(xpath_text(video, './/age18', 'age limit'))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'age_limit': age_limit,
+            'formats': formats,
+        }

From f8388757269de8a5e87b9a507db55021b3090980 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 8 Mar 2015 18:07:10 +0600
Subject: [PATCH 04/16] [pladform] Add support for embeds

---
 youtube_dl/extractor/generic.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 013198b0d..4e6927b08 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -596,6 +596,19 @@ class GenericIE(InfoExtractor):
                 'view_count': int,
             },
         },
+        # Pladform embed
+        {
+            'url': 'http://muz-tv.ru/kinozal/view/7400/',
+            'info_dict': {
+                'id': '100183293',
+                'ext': 'mp4',
+                'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
+                'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'duration': 694,
+                'age_limit': 0,
+            },
+        },
         # RSS feed with enclosure
         {
             'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
@@ -1193,6 +1206,12 @@ def _playlist_from_matches(matches, getter=None, ie=None):
         if mobj is not None:
             return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
 
+        # Look for Pladform embeds
+        mobj = re.search(
+            r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'Pladform')
+
         def check_video(vurl):
             if YoutubeIE.suitable(vurl):
                 return True

From 11101076a1b0eb36dd56b1b5aa1b7559f2d230b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 8 Mar 2015 18:09:47 +0600
Subject: [PATCH 05/16] [pladform] Fix format quality sorting

---
 youtube_dl/extractor/pladform.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/youtube_dl/extractor/pladform.py b/youtube_dl/extractor/pladform.py
index 926e368a2..abde34b94 100644
--- a/youtube_dl/extractor/pladform.py
+++ b/youtube_dl/extractor/pladform.py
@@ -6,6 +6,7 @@
     ExtractorError,
     int_or_none,
     xpath_text,
+    qualities,
 )
 
 
@@ -55,9 +56,12 @@ def _real_extract(self, url):
                 '%s returned error: %s' % (self.IE_NAME, video.text),
                 expected=True)
 
+        quality = qualities(('ld', 'sd', 'hd'))
+
         formats = [{
             'url': src.text,
             'format_id': src.get('quality'),
+            'quality': quality(src.get('quality')),
         } for src in video.findall('./src')]
         self._sort_formats(formats)
 

From 24993e3b396407af9ccdf8dd893588df31aff8af Mon Sep 17 00:00:00 2001
From: Naglis Jonaitis <njonaitis@gmail.com>
Date: Sun, 8 Mar 2015 14:08:45 +0200
Subject: [PATCH 06/16] [vidme] Fix view_count extraction and remove
 comment_count extraction (Fixes #5133)

Comment counts seem to no longer be listed on vid.me
---
 youtube_dl/extractor/vidme.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py
index 339c3d897..bd953fb4c 100644
--- a/youtube_dl/extractor/vidme.py
+++ b/youtube_dl/extractor/vidme.py
@@ -41,13 +41,10 @@ def _real_extract(self, url):
         duration = float_or_none(self._html_search_regex(
             r'data-duration="([^"]+)"', webpage, 'duration', fatal=False))
         view_count = str_to_int(self._html_search_regex(
-            r'<span class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
+            r'<(?:li|span) class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
         like_count = str_to_int(self._html_search_regex(
             r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">',
             webpage, 'like count', fatal=False))
-        comment_count = str_to_int(self._html_search_regex(
-            r'class="js-comment-count"[^>]+data-count="([\d,\.\s]+)">',
-            webpage, 'comment count', fatal=False))
 
         return {
             'id': video_id,
@@ -61,5 +58,4 @@ def _real_extract(self, url):
             'duration': duration,
             'view_count': view_count,
             'like_count': like_count,
-            'comment_count': comment_count,
         }

From 8b910bda0cdba859c1971a410aa5d2a51c6e4918 Mon Sep 17 00:00:00 2001
From: Naglis Jonaitis <njonaitis@gmail.com>
Date: Sun, 8 Mar 2015 14:28:53 +0200
Subject: [PATCH 07/16] [teamcoco] Fix extraction

---
 youtube_dl/extractor/teamcoco.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py
index 5793dbc10..7cb06f351 100644
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -53,10 +53,10 @@ def _real_extract(self, url):
         embed = self._download_webpage(
             embed_url, video_id, 'Downloading embed page')
 
-        encoded_data = self._search_regex(
-            r'"preload"\s*:\s*"([^"]+)"', embed, 'encoded data')
+        player_data = self._parse_json(self._search_regex(
+            r'Y\.Ginger\.Module\.Player\((\{.*?\})\);', embed, 'player data'), video_id)
         data = self._parse_json(
-            base64.b64decode(encoded_data.encode('ascii')).decode('utf-8'), video_id)
+            base64.b64decode(player_data['preload'].encode('ascii')).decode('utf-8'), video_id)
 
         formats = []
         get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])

From bdf6eee0aeed8df586569982eaaac04eecc0062d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 8 Mar 2015 19:17:54 +0600
Subject: [PATCH 08/16] [gazeta] Extend _VALID_URL

---
 youtube_dl/extractor/gazeta.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/gazeta.py b/youtube_dl/extractor/gazeta.py
index 3f9e58061..ea32b621c 100644
--- a/youtube_dl/extractor/gazeta.py
+++ b/youtube_dl/extractor/gazeta.py
@@ -7,8 +7,8 @@
 
 
 class GazetaIE(InfoExtractor):
-    _VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:(?P<category>[^/]*)/)?video/(?:main/)?(?P<id>[A-Za-z0-9-_]+)\.s?html)'
-    _TEST = {
+    _VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:(?:main|\d{4}/\d{2}/\d{2})/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)'
+    _TESTS = [{
         'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml',
         'md5': 'd49c9bdc6e5a7888f27475dc215ee789',
         'info_dict': {
@@ -18,7 +18,10 @@ class GazetaIE(InfoExtractor):
             'description': 'md5:38617526050bd17b234728e7f9620a71',
             'thumbnail': 're:^https?://.*\.jpg',
         },
-    }
+    }, {
+        'url': 'http://www.gazeta.ru/lifestyle/video/2015/03/08/master-klass_krasivoi_byt._delaem_vesennii_makiyazh.shtml',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)

From a2aaf4dbc6e5f5d345329b5a845111851453b6a6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 8 Mar 2015 20:55:22 +0600
Subject: [PATCH 09/16] [utils] Add sanitize_path

---
 test/test_utils.py  | 21 +++++++++++++++++++++
 youtube_dl/utils.py | 18 ++++++++++++++++++
 2 files changed, 39 insertions(+)

diff --git a/test/test_utils.py b/test/test_utils.py
index 64fad58ad..5ebb8d498 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -38,6 +38,7 @@
     parse_iso8601,
     read_batch_urls,
     sanitize_filename,
+    sanitize_path,
     shell_quote,
     smuggle_url,
     str_to_int,
@@ -131,6 +132,26 @@ def test_sanitize_ids(self):
         self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
         self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
 
+    def test_sanitize_path(self):
+        if sys.platform != 'win32':
+            return
+
+        self.assertEqual(sanitize_path('abc'), 'abc')
+        self.assertEqual(sanitize_path('abc/def'), 'abc\\def')
+        self.assertEqual(sanitize_path('abc\\def'), 'abc\\def')
+        self.assertEqual(sanitize_path('abc|def'), 'abc#def')
+        self.assertEqual(sanitize_path('<>:"|?*'), '#######')
+        self.assertEqual(sanitize_path('C:/abc/def'), 'C:\\abc\\def')
+        self.assertEqual(sanitize_path('C?:/abc/def'), 'C##\\abc\\def')
+
+        self.assertEqual(sanitize_path('\\\\?\\UNC\\ComputerName\\abc'), '\\\\?\\UNC\\ComputerName\\abc')
+        self.assertEqual(sanitize_path('\\\\?\\UNC/ComputerName/abc'), '\\\\?\\UNC\\ComputerName\\abc')
+
+        self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
+        self.assertEqual(sanitize_path('\\\\?\\C:/abc'), '\\\\?\\C:\\abc')
+        self.assertEqual(sanitize_path('\\\\?\\C:\\ab?c\\de:f'), '\\\\?\\C:\\ab#c\\de#f')
+        self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
+
     def test_ordered_set(self):
         self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
         self.assertEqual(orderedSet([]), [])
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 7426e2a1f..0f49d602e 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -311,6 +311,24 @@ def replace_insane(char):
     return result
 
 
+def sanitize_path(s):
+    """Sanitizes and normalizes path on Windows"""
+    if sys.platform != 'win32':
+        return s
+    drive, _ = os.path.splitdrive(s)
+    unc, _ = os.path.splitunc(s)
+    unc_or_drive = unc or drive
+    norm_path = os.path.normpath(remove_start(s, unc_or_drive)).split(os.path.sep)
+    if unc_or_drive:
+        norm_path.pop(0)
+    sanitized_path = [
+        re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
+        for path_part in norm_path]
+    if unc_or_drive:
+        sanitized_path.insert(0, unc_or_drive + os.path.sep)
+    return os.path.join(*sanitized_path)
+
+
 def orderedSet(iterable):
     """ Remove all duplicates from the input iterable """
     res = []

From d55de57b67bceca5d9116ddcc2ada2fa1957d89d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 8 Mar 2015 20:56:28 +0600
Subject: [PATCH 10/16] [utils] Fix sanitize_open

---
 youtube_dl/utils.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 0f49d602e..e511232ca 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -252,15 +252,12 @@ def sanitize_open(filename, open_mode):
             raise
 
         # In case of error, try to remove win32 forbidden chars
-        alt_filename = os.path.join(
-            re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
-            for path_part in os.path.split(filename)
-        )
+        alt_filename = sanitize_path(filename)
         if alt_filename == filename:
             raise
         else:
             # An exception here should be caught in the caller
-            stream = open(encodeFilename(filename), open_mode)
+            stream = open(encodeFilename(alt_filename), open_mode)
             return (stream, alt_filename)
 
 

From 1bb5c511a551ba7c5d3179d2d3a124f0977e74b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 8 Mar 2015 20:57:30 +0600
Subject: [PATCH 11/16] [YoutubeDL] Sanitize outtmpl as path

---
 youtube_dl/YoutubeDL.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index bae52e9c7..4b9151163 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -61,6 +61,7 @@
     render_table,
     SameFileError,
     sanitize_filename,
+    sanitize_path,
     std_headers,
     subtitles_filename,
     takewhile_inclusive,
@@ -553,20 +554,16 @@ def prepare_filename(self, info_dict):
                 elif template_dict.get('width'):
                     template_dict['resolution'] = '?x%d' % template_dict['width']
 
-            restrict_filenames = self.params.get('restrictfilenames')
-
             sanitize = lambda k, v: sanitize_filename(
                 compat_str(v),
-                restricted=restrict_filenames,
+                restricted=self.params.get('restrictfilenames'),
                 is_id=(k == 'id'))
             template_dict = dict((k, sanitize(k, v))
                                  for k, v in template_dict.items()
                                  if v is not None)
             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 
-            outtmpl = sanitize_filename(
-                self.params.get('outtmpl', DEFAULT_OUTTMPL),
-                restricted=restrict_filenames)
+            outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
             tmpl = compat_expanduser(outtmpl)
             filename = tmpl % template_dict
             # Temporary fix for #4787

From f18ef2d14463a13d80e967d1b18ece6a076f60fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 8 Mar 2015 22:08:48 +0600
Subject: [PATCH 12/16] [utils] Disallow trailing dot in sanitize_path for a
 path part

---
 test/test_utils.py  | 11 +++++++++++
 youtube_dl/utils.py |  2 +-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/test/test_utils.py b/test/test_utils.py
index 5ebb8d498..28bda654e 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -152,6 +152,17 @@ def test_sanitize_path(self):
         self.assertEqual(sanitize_path('\\\\?\\C:\\ab?c\\de:f'), '\\\\?\\C:\\ab#c\\de#f')
         self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
 
+        self.assertEqual(
+            sanitize_path('youtube/%(uploader)s/%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s'),
+            'youtube\\%(uploader)s\\%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s')
+
+        self.assertEqual(
+            sanitize_path('youtube/TheWreckingYard ./00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part'),
+            'youtube\\TheWreckingYard #\\00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part')
+        self.assertEqual(sanitize_path('abc/def...'), 'abc\\def..#')
+        self.assertEqual(sanitize_path('abc.../def'), 'abc..#\\def')
+        self.assertEqual(sanitize_path('abc.../def...'), 'abc..#\\def..#')
+
     def test_ordered_set(self):
         self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
         self.assertEqual(orderedSet([]), [])
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index e511232ca..d5597d514 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -319,7 +319,7 @@ def sanitize_path(s):
     if unc_or_drive:
         norm_path.pop(0)
     sanitized_path = [
-        re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
+        re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part)
         for path_part in norm_path]
     if unc_or_drive:
         sanitized_path.insert(0, unc_or_drive + os.path.sep)

From e5a11a2293069b1acfa5eb5d2694ad4082dd9755 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Sun, 8 Mar 2015 22:09:42 +0600
Subject: [PATCH 13/16] [YoutubeDL] Sanitize path before creating non-existent
 paths (Closes #4324)

---
 youtube_dl/YoutubeDL.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 4b9151163..bce7587fd 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1262,7 +1262,7 @@ def process_info(self, info_dict):
             return
 
         try:
-            dn = os.path.dirname(encodeFilename(filename))
+            dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
             if dn and not os.path.exists(dn):
                 os.makedirs(dn)
         except (OSError, IOError) as err:

From 43d6280d0a03335ec5143383f15e2ca9a49f4046 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
 <jaime.marquinez.ferrandiz@gmail.com>
Date: Sun, 8 Mar 2015 18:25:11 +0100
Subject: [PATCH 14/16] [downloader/f4m] Fix use of base64 in python 3.2 (fixes
 #5132)

b64decode needs a byte string, but on 3.4 it also accepts strings.
---
 youtube_dl/downloader/f4m.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py
index 3dc796faa..4ab000d67 100644
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -281,7 +281,7 @@ def _parse_bootstrap_node(self, node, base_url):
             boot_info = self._get_bootstrap_from_url(bootstrap_url)
         else:
             bootstrap_url = None
-            bootstrap = base64.b64decode(node.text)
+            bootstrap = base64.b64decode(node.text.encode('ascii'))
             boot_info = read_bootstrap_info(bootstrap)
         return (boot_info, bootstrap_url)
 
@@ -308,7 +308,7 @@ def real_download(self, filename, info_dict):
         live = boot_info['live']
         metadata_node = media.find(_add_ns('metadata'))
         if metadata_node is not None:
-            metadata = base64.b64decode(metadata_node.text)
+            metadata = base64.b64decode(metadata_node.text.encode('ascii'))
         else:
             metadata = None
 

From cc08b11d16efd125d765cc1a69ac795a592f012c Mon Sep 17 00:00:00 2001
From: Naglis Jonaitis <njonaitis@gmail.com>
Date: Sun, 8 Mar 2015 21:32:42 +0200
Subject: [PATCH 15/16] [adultswim] Improve video_info extraction (Fixes #5152)

Look for video_info inside `slugged_video`, if slug is not found among collections.
Also, simplify a bit.
---
 youtube_dl/extractor/adultswim.py | 44 ++++++++++++++++++++++---------
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py
index 34b8b0115..39335b827 100644
--- a/youtube_dl/extractor/adultswim.py
+++ b/youtube_dl/extractor/adultswim.py
@@ -2,13 +2,12 @@
 from __future__ import unicode_literals
 
 import re
-import json
 
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
-    xpath_text,
     float_or_none,
+    xpath_text,
 )
 
 
@@ -60,6 +59,24 @@ class AdultSwimIE(InfoExtractor):
             'title': 'American Dad - Putting Francine Out of Business',
             'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
         },
+    }, {
+        'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
+        'playlist': [
+            {
+                'md5': '3e346a2ab0087d687a05e1e7f3b3e529',
+                'info_dict': {
+                    'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0',
+                    'ext': 'flv',
+                    'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
+                    'description': 'Dr. Brule reports live from Wine Country with a special report on wines.  \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
+                },
+            }
+        ],
+        'info_dict': {
+            'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
+            'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
+            'description': 'Dr. Brule reports live from Wine Country with a special report on wines.  \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
+        },
     }]
 
     @staticmethod
@@ -80,6 +97,7 @@ def find_collection_containing_video(collections, slug):
             for video in collection.get('videos'):
                 if video.get('slug') == slug:
                     return collection, video
+        return None, None
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -90,28 +108,30 @@ def _real_extract(self, url):
         webpage = self._download_webpage(url, episode_path)
 
         # Extract the value of `bootstrappedData` from the Javascript in the page.
-        bootstrappedDataJS = self._search_regex(r'var bootstrappedData = ({.*});', webpage, episode_path)
-
-        try:
-            bootstrappedData = json.loads(bootstrappedDataJS)
-        except ValueError as ve:
-            errmsg = '%s: Failed to parse JSON ' % episode_path
-            raise ExtractorError(errmsg, cause=ve)
+        bootstrapped_data = self._parse_json(self._search_regex(
+            r'var bootstrappedData = ({.*});', webpage, 'bootstraped data'), episode_path)
 
         # Downloading videos from a /videos/playlist/ URL needs to be handled differently.
         # NOTE: We are only downloading one video (the current one) not the playlist
         if is_playlist:
-            collections = bootstrappedData['playlists']['collections']
+            collections = bootstrapped_data['playlists']['collections']
             collection = self.find_collection_by_linkURL(collections, show_path)
             video_info = self.find_video_info(collection, episode_path)
 
             show_title = video_info['showTitle']
             segment_ids = [video_info['videoPlaybackID']]
         else:
-            collections = bootstrappedData['show']['collections']
+            collections = bootstrapped_data['show']['collections']
             collection, video_info = self.find_collection_containing_video(collections, episode_path)
 
-            show = bootstrappedData['show']
+            # Video wasn't found in the collections, let's try `slugged_video`.
+            if video_info is None:
+                if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
+                    video_info = bootstrapped_data['slugged_video']
+                else:
+                    raise ExtractorError('Unable to find video info')
+
+            show = bootstrapped_data['show']
             show_title = show['title']
             segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
 

From dd7831fe94a0fb8270e7fa3699677c7476a5cd83 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com>
Date: Mon, 9 Mar 2015 04:55:35 +0600
Subject: [PATCH 16/16] [breakcom] Process only play purpose media formats
 (Closes #5164)

---
 youtube_dl/extractor/breakcom.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/breakcom.py b/youtube_dl/extractor/breakcom.py
index 4bcc897c9..809287d14 100644
--- a/youtube_dl/extractor/breakcom.py
+++ b/youtube_dl/extractor/breakcom.py
@@ -41,7 +41,7 @@ def _real_extract(self, url):
             'tbr': media['bitRate'],
             'width': media['width'],
             'height': media['height'],
-        } for media in info['media']]
+        } for media in info['media'] if media.get('mediaPurpose') == 'play']
 
         if not formats:
             formats.append({