From f9355dc989362f31d8e21ccb8fa765546a2360f2 Mon Sep 17 00:00:00 2001
From: PeterDing <dfhayst@gmail.com>
Date: Thu, 28 May 2015 17:00:09 +0800
Subject: [PATCH 01/13] [youku] update youku

---
 youtube_dl/extractor/youku.py | 248 ++++++++++++++++++++++------------
 1 file changed, 162 insertions(+), 86 deletions(-)
diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py
index 97b98bbe8..8d86c3f45 100644
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -1,123 +1,199 @@
 # coding: utf-8
-
 from __future__ import unicode_literals
 
-import math
-import random
 import re
-import time
+import base64
 
 from .common import InfoExtractor
-from ..utils import (
-    ExtractorError,
-)
-
+from ..utils import ExtractorError
 
 class YoukuIE(InfoExtractor):
+    IE_NAME = 'youku'
     _VALID_URL = r'''(?x)
         (?:
             http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
             youku:)
         (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
     '''
+
     _TEST = {
-        'url': 'http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html',
-        'md5': 'ffe3f2e435663dc2d1eea34faeff5b5b',
-        'params': {
-            'test': False
-        },
-        'info_dict': {
-            'id': 'XNDgyMDQ2NTQw_part00',
-            'ext': 'flv',
-            'title': 'youtube-dl test video "\'/\\ä↭𝕐'
-        }
+            'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
+            'md5': '5f3af4192eabacc4501508d54a8cabd7',
+            'info_dict': {
+                'id': 'XMTc1ODE5Njcy',
+                'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
+                'ext': 'flv'
+            }
     }
 
-    def _gen_sid(self):
-        nowTime = int(time.time() * 1000)
-        random1 = random.randint(1000, 1998)
-        random2 = random.randint(1000, 9999)
+    def construct_video_urls(self, data1, data2):
+        # get sid, token
+        def yk_t(s1, s2):
+            ls = list(range(256))
+            t = 0
+            for i in range(256):
+                t = (t + ls[i] + ord(s1[i%len(s1)])) % 256
+                ls[i], ls[t] = ls[t], ls[i]
+            s, x, y = '', 0, 0
+            for i in range(len(s2)):
+                y = (y + 1) % 256
+                x = (x + ls[y]) % 256
+                ls[x], ls[y] = ls[y], ls[x]
+                s += chr((s2[i] ^ ls[(ls[x]+ls[y]) % 256]))
+            return s
 
-        return "%d%d%d" % (nowTime, random1, random2)
+        sid, token = yk_t(
+            'becaf9be', base64.b64decode(bytes(data2['ep'], 'ascii'))
+        ).split('_')
 
-    def _get_file_ID_mix_string(self, seed):
-        mixed = []
-        source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890")
-        seed = float(seed)
-        for i in range(len(source)):
-            seed = (seed * 211 + 30031) % 65536
-            index = math.floor(seed / 65536 * len(source))
-            mixed.append(source[int(index)])
-            source.remove(source[int(index)])
-        # return ''.join(mixed)
-        return mixed
+        # get oip
+        oip = data2['ip']
 
-    def _get_file_id(self, fileId, seed):
-        mixed = self._get_file_ID_mix_string(seed)
-        ids = fileId.split('*')
-        realId = []
-        for ch in ids:
-            if ch:
-                realId.append(mixed[int(ch)])
-        return ''.join(realId)
+        # get fileid
+        string_ls = list(
+            'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890')
+        shuffled_string_ls = []
+        seed = data1['seed']
+        N = len(string_ls)
+        for ii in range(N):
+            seed = (seed * 0xd3 + 0x754f) % 0x10000
+            idx = seed * len(string_ls) // 0x10000
+            shuffled_string_ls.append(string_ls[idx])
+            del string_ls[idx]
+
+        fileid_dict = {}
+        for format in data1['streamtypes']:
+            streamfileid = [
+                int(i) for i in data1['streamfileids'][format].strip('*').split('*')]
+            fileid = ''.join(
+                [shuffled_string_ls[i] for i in streamfileid])
+            fileid_dict[format] = fileid[:8] + '%s' + fileid[10:]
+
+        def get_fileid(format, n):
+            fileid = fileid_dict[format] % hex(int(n))[2:].upper().zfill(2)
+            return fileid
+
+        # get ep
+        def generate_ep(format, n):
+            fileid = get_fileid(format, n)
+            ep_t = yk_t(
+                'bf7e5f01',
+                bytes('%s_%s_%s' % (sid, fileid, token), 'ascii'))
+            ep = base64.b64encode(bytes(ep_t, 'latin')).decode()
+            ep = ep.replace('+', '%2B')
+            ep = ep.replace('/', '%2F')
+            ep = ep.replace('=', '%2D')
+            return ep
+
+        # generate video_urls
+        video_urls_dict = {}
+        for format in data1['streamtypes']:
+            video_urls = []
+            for dt in data1['segs'][format]:
+                n = str(int(dt['no']))
+                video_url = \
+                    'http://k.youku.com/player/getFlvPath/' + \
+                    'sid/' + sid + \
+                    '_' + str(int(n)+1).zfill(2) + \
+                    '/st/' + self.parse_ext_l(format) + \
+                    '/fileid/' + get_fileid(format, n)  + '?' + \
+                    'K=' + str(dt['k']) + \
+                    '&hd=' + self.get_hd(format) + \
+                    '&myp=0' + \
+                    '&ts=' + str(dt['seconds']) + \
+                    '&ypp=0&ctype=12&ev=1' + \
+                    '&token=' + str(token) + \
+                    '&oip=' + str(oip) + \
+                    '&ep=' + generate_ep(format, n)
+                video_urls.append(video_url)
+            video_urls_dict[format] = video_urls
+
+        return video_urls_dict
+
+    def get_hd(self, fm):
+        hd_id_dict = {
+            'flv': '0',
+            'mp4': '1',
+            'hd2': '2',
+            'hd3': '3',
+            '3gp': '0',
+            '3gphd': '1'
+        }
+        return hd_id_dict[fm]
+
+    def parse_ext_l(self, fm):
+        ext_dict = {
+            'flv': 'flv',
+            'mp4': 'mp4',
+            'hd2': 'flv',
+            'hd3': 'flv',
+            '3gp': 'flv',
+            '3gphd': 'mp4',
+        }
+        return ext_dict[fm]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
 
-        info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id
+        # request basic data
+        data1_url = 'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id
+        data2_url = 'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id
 
-        config = self._download_json(info_url, video_id)
+        raw_data1 = self._download_json(data1_url, video_id)
+        raw_data2 = self._download_json(data2_url, video_id)
+        data1 = raw_data1['data'][0]
+        data2 = raw_data2['data'][0]
 
-        error_code = config['data'][0].get('error_code')
+        error_code = data1.get('error_code')
         if error_code:
             # -8 means blocked outside China.
-            error = config['data'][0].get('error')  # Chinese and English, separated by newline.
-            raise ExtractorError(error or 'Server reported error %i' % error_code,
-                                 expected=True)
+            # Chinese and English, separated by newline.
+            error = data1.get('error')
+            raise ExtractorError(
+                error or 'Server reported error %i' %
+                error_code,
+                expected=True)
 
-        video_title = config['data'][0]['title']
-        seed = config['data'][0]['seed']
+        title = data1['title']
 
-        format = self._downloader.params.get('format', None)
-        supported_format = list(config['data'][0]['streamfileids'].keys())
+        # generate video_urls_dict
+        video_urls_dict = self.construct_video_urls(data1, data2)
 
-        # TODO proper format selection
-        if format is None or format == 'best':
-            if 'hd2' in supported_format:
-                format = 'hd2'
-            else:
-                format = 'flv'
-            ext = 'flv'
-        elif format == 'worst':
-            format = 'mp4'
-            ext = 'mp4'
-        else:
-            format = 'flv'
-            ext = 'flv'
+        # construct info
+        entries = []
+        for fm in data1['streamtypes']:
+            #formats = []
+            video_urls = video_urls_dict[fm]
+            for i in range(len(video_urls)):
+                if len(entries) < i+1:
+                    entries.append({'formats': []})
+                entries[i]['formats'].append(
+                    {
+                        'url': video_urls[i],
+                        'format_id': fm,
+                        'ext': self.parse_ext_l(fm),
+                        'filesize': int(data1['segs'][fm][i]['size'])
+                    }
+                )
 
-        fileid = config['data'][0]['streamfileids'][format]
-        keys = [s['k'] for s in config['data'][0]['segs'][format]]
-        # segs is usually a dictionary, but an empty *list* if an error occured.
-
-        files_info = []
-        sid = self._gen_sid()
-        fileid = self._get_file_id(fileid, seed)
-
-        # column 8,9 of fileid represent the segment number
-        # fileid[7:9] should be changed
-        for index, key in enumerate(keys):
-            temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:])
-            download_url = 'http://k.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key)
+        for i in range(len(entries)):
+            entries[i].update(
+                {
+                    'id': '_part%d' % (i+1),
+                    'title': title,
+                }
+            )
 
+        if len(entries) > 1:
             info = {
-                'id': '%s_part%02d' % (video_id, index),
-                'url': download_url,
-                'uploader': None,
-                'upload_date': None,
-                'title': video_title,
-                'ext': ext,
+                '_type': 'multi_video',
+                'id': video_id,
+                'title': title,
+                'entries': entries,
             }
-            files_info.append(info)
+        else:
+            info = entries[0]
+            info['id'] = video_id
 
-        return files_info
+        return info

From ca45246627f5a67a7c82cd40a11e5c4ff5f68871 Mon Sep 17 00:00:00 2001
From: PeterDing <dfhayst@gmail.com>
Date: Thu, 28 May 2015 21:04:58 +0800
Subject: [PATCH 02/13] [youku] compatible for python > 3.3 or > 2.7

---
 youtube_dl/extractor/youku.py | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py
index 8d86c3f45..7a07c8a5f 100644
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -1,6 +1,8 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import sys
+pyvs = sys.version_info[0]
 import re
 import base64
 
@@ -34,16 +36,23 @@ def yk_t(s1, s2):
             for i in range(256):
                 t = (t + ls[i] + ord(s1[i%len(s1)])) % 256
                 ls[i], ls[t] = ls[t], ls[i]
-            s, x, y = '', 0, 0
+            s = '' if pyvs == 3 else b''
+            x, y = 0, 0
             for i in range(len(s2)):
                 y = (y + 1) % 256
                 x = (x + ls[y]) % 256
                 ls[x], ls[y] = ls[y], ls[x]
-                s += chr((s2[i] ^ ls[(ls[x]+ls[y]) % 256]))
+                if isinstance(s2[i], int):
+                    s += chr(s2[i] ^ ls[(ls[x]+ls[y]) % 256])
+                else:
+                    s += chr(ord(s2[i]) ^ ls[(ls[x]+ls[y]) % 256])
             return s
 
         sid, token = yk_t(
-            'becaf9be', base64.b64decode(bytes(data2['ep'], 'ascii'))
+            'becaf9be',
+            base64.b64decode(bytes(data2['ep'], 'ascii')) \
+                if pyvs == 3 \
+                else base64.b64decode(data2['ep'])
         ).split('_')
 
         # get oip
@@ -78,8 +87,15 @@ def generate_ep(format, n):
             fileid = get_fileid(format, n)
             ep_t = yk_t(
                 'bf7e5f01',
-                bytes('%s_%s_%s' % (sid, fileid, token), 'ascii'))
-            ep = base64.b64encode(bytes(ep_t, 'latin')).decode()
+                bytes('%s_%s_%s' % (sid, fileid, token), 'ascii') \
+                if pyvs == 3 \
+                else ('%s_%s_%s' % (sid, fileid, token))
+            )
+            ep = base64.b64encode(
+                bytes(ep_t, 'latin') \
+                if pyvs == 3 \
+                else ep_t
+            ).decode()
             ep = ep.replace('+', '%2B')
             ep = ep.replace('/', '%2F')
             ep = ep.replace('=', '%2D')

From 1498940b10a3f43490c05045ebe7a517267a2bff Mon Sep 17 00:00:00 2001
From: PeterDing <dfhayst@gmail.com>
Date: Fri, 29 May 2015 10:13:09 +0800
Subject: [PATCH 03/13] [youku] compare bytes and str for compatible; use
 compat_urllib_parse for making video_url

---
 youtube_dl/extractor/youku.py | 38 +++++++++++++++++++----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py
index 7a07c8a5f..063f2e10e 100644
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -1,14 +1,16 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import sys
-pyvs = sys.version_info[0]
 import re
 import base64
 
 from .common import InfoExtractor
 from ..utils import ExtractorError
 
+from ..compat import compat_urllib_parse
+
+bytes_is_str = (bytes == str)  # for compatible
+
 class YoukuIE(InfoExtractor):
     IE_NAME = 'youku'
     _VALID_URL = r'''(?x)
@@ -36,7 +38,7 @@ def yk_t(s1, s2):
             for i in range(256):
                 t = (t + ls[i] + ord(s1[i%len(s1)])) % 256
                 ls[i], ls[t] = ls[t], ls[i]
-            s = '' if pyvs == 3 else b''
+            s = '' if not bytes_is_str else b''
             x, y = 0, 0
             for i in range(len(s2)):
                 y = (y + 1) % 256
@@ -51,7 +53,7 @@ def yk_t(s1, s2):
         sid, token = yk_t(
             'becaf9be',
             base64.b64decode(bytes(data2['ep'], 'ascii')) \
-                if pyvs == 3 \
+                if not bytes_is_str \
                 else base64.b64decode(data2['ep'])
         ).split('_')
 
@@ -88,17 +90,14 @@ def generate_ep(format, n):
             ep_t = yk_t(
                 'bf7e5f01',
                 bytes('%s_%s_%s' % (sid, fileid, token), 'ascii') \
-                if pyvs == 3 \
+                if not bytes_is_str \
                 else ('%s_%s_%s' % (sid, fileid, token))
             )
             ep = base64.b64encode(
                 bytes(ep_t, 'latin') \
-                if pyvs == 3 \
+                if not bytes_is_str \
                 else ep_t
             ).decode()
-            ep = ep.replace('+', '%2B')
-            ep = ep.replace('/', '%2F')
-            ep = ep.replace('=', '%2D')
             return ep
 
         # generate video_urls
@@ -107,20 +106,25 @@ def generate_ep(format, n):
             video_urls = []
             for dt in data1['segs'][format]:
                 n = str(int(dt['no']))
+                param = {
+                    'K': dt['k'],
+                    'hd': self.get_hd(format),
+                    'myp': 0,
+                    'ts': dt['seconds'],
+                    'ypp': 0,
+                    'ctype': 12,
+                    'ev': 1,
+                    'token': token,
+                    'oip': oip,
+                    'ep': generate_ep(format, n)
+                }
                 video_url = \
                     'http://k.youku.com/player/getFlvPath/' + \
                     'sid/' + sid + \
                     '_' + str(int(n)+1).zfill(2) + \
                     '/st/' + self.parse_ext_l(format) + \
                     '/fileid/' + get_fileid(format, n)  + '?' + \
-                    'K=' + str(dt['k']) + \
-                    '&hd=' + self.get_hd(format) + \
-                    '&myp=0' + \
-                    '&ts=' + str(dt['seconds']) + \
-                    '&ypp=0&ctype=12&ev=1' + \
-                    '&token=' + str(token) + \
-                    '&oip=' + str(oip) + \
-                    '&ep=' + generate_ep(format, n)
+                    compat_urllib_parse.urlencode(param)
                 video_urls.append(video_url)
             video_urls_dict[format] = video_urls
 

From 08f7db20c16743a2bd3040eb7dac11d675011eef Mon Sep 17 00:00:00 2001
From: PeterDing <dfhayst@gmail.com>
Date: Sat, 30 May 2015 10:03:32 +0800
Subject: [PATCH 04/13] [youku] change format_id

---
 youtube_dl/extractor/youku.py | 37 +++++++++++++++++++++++------------
 1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py
index 063f2e10e..aed6b960a 100644
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -132,26 +132,37 @@ def generate_ep(format, n):
 
     def get_hd(self, fm):
         hd_id_dict = {
-            'flv': '0',
-            'mp4': '1',
-            'hd2': '2',
-            'hd3': '3',
-            '3gp': '0',
-            '3gphd': '1'
+            'flv'   : '0',
+            'mp4'   : '1',
+            'hd2'   : '2',
+            'hd3'   : '3',
+            '3gp'   : '0',
+            '3gphd' : '1'
         }
         return hd_id_dict[fm]
 
     def parse_ext_l(self, fm):
         ext_dict = {
-            'flv': 'flv',
-            'mp4': 'mp4',
-            'hd2': 'flv',
-            'hd3': 'flv',
-            '3gp': 'flv',
-            '3gphd': 'mp4',
+            'flv'   : 'flv',
+            'mp4'   : 'mp4',
+            'hd2'   : 'flv',
+            'hd3'   : 'flv',
+            '3gp'   : 'flv',
+            '3gphd' : 'mp4'
         }
         return ext_dict[fm]
 
+    def get_format_name(self, fm):
+        _dict = {
+            '3gp'   : 'h6',
+            '3gphd' : 'h5',
+            'flv'   : 'h4',
+            'mp4'   : 'h3',
+            'hd2'   : 'h2',
+            'hd3'   : 'h1'
+        }
+        return _dict[fm]
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
@@ -191,7 +202,7 @@ def _real_extract(self, url):
                 entries[i]['formats'].append(
                     {
                         'url': video_urls[i],
-                        'format_id': fm,
+                        'format_id': self.get_format_name(fm),
                         'ext': self.parse_ext_l(fm),
                         'filesize': int(data1['segs'][fm][i]['size'])
                     }

From aed473ccf9d9da73b1b80ee8b06d00ee66a3769d Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Mon, 15 Jun 2015 22:41:24 +0800
Subject: [PATCH 05/13] [youku] PEP8

---
 youtube_dl/extractor/youku.py | 80 +++++++++++++++++------------------
 1 file changed, 40 insertions(+), 40 deletions(-)

diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py
index aed6b960a..4e47fca8a 100644
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -11,6 +11,7 @@
 
 bytes_is_str = (bytes == str)  # for compatible
 
+
 class YoukuIE(InfoExtractor):
     IE_NAME = 'youku'
     _VALID_URL = r'''(?x)
@@ -21,13 +22,13 @@ class YoukuIE(InfoExtractor):
     '''
 
     _TEST = {
-            'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
-            'md5': '5f3af4192eabacc4501508d54a8cabd7',
-            'info_dict': {
-                'id': 'XMTc1ODE5Njcy',
-                'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
-                'ext': 'flv'
-            }
+        'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
+        'md5': '5f3af4192eabacc4501508d54a8cabd7',
+        'info_dict': {
+            'id': 'XMTc1ODE5Njcy',
+            'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
+            'ext': 'flv'
+        }
     }
 
     def construct_video_urls(self, data1, data2):
@@ -36,7 +37,7 @@ def yk_t(s1, s2):
             ls = list(range(256))
             t = 0
             for i in range(256):
-                t = (t + ls[i] + ord(s1[i%len(s1)])) % 256
+                t = (t + ls[i] + ord(s1[i % len(s1)])) % 256
                 ls[i], ls[t] = ls[t], ls[i]
             s = '' if not bytes_is_str else b''
             x, y = 0, 0
@@ -45,16 +46,16 @@ def yk_t(s1, s2):
                 x = (x + ls[y]) % 256
                 ls[x], ls[y] = ls[y], ls[x]
                 if isinstance(s2[i], int):
-                    s += chr(s2[i] ^ ls[(ls[x]+ls[y]) % 256])
+                    s += chr(s2[i] ^ ls[(ls[x] + ls[y]) % 256])
                 else:
-                    s += chr(ord(s2[i]) ^ ls[(ls[x]+ls[y]) % 256])
+                    s += chr(ord(s2[i]) ^ ls[(ls[x] + ls[y]) % 256])
             return s
 
         sid, token = yk_t(
             'becaf9be',
-            base64.b64decode(bytes(data2['ep'], 'ascii')) \
-                if not bytes_is_str \
-                else base64.b64decode(data2['ep'])
+            base64.b64decode(bytes(data2['ep'], 'ascii'))
+            if not bytes_is_str
+            else base64.b64decode(data2['ep'])
         ).split('_')
 
         # get oip
@@ -89,13 +90,13 @@ def generate_ep(format, n):
             fileid = get_fileid(format, n)
             ep_t = yk_t(
                 'bf7e5f01',
-                bytes('%s_%s_%s' % (sid, fileid, token), 'ascii') \
-                if not bytes_is_str \
+                bytes('%s_%s_%s' % (sid, fileid, token), 'ascii')
+                if not bytes_is_str
                 else ('%s_%s_%s' % (sid, fileid, token))
             )
             ep = base64.b64encode(
-                bytes(ep_t, 'latin') \
-                if not bytes_is_str \
+                bytes(ep_t, 'latin')
+                if not bytes_is_str
                 else ep_t
             ).decode()
             return ep
@@ -121,9 +122,9 @@ def generate_ep(format, n):
                 video_url = \
                     'http://k.youku.com/player/getFlvPath/' + \
                     'sid/' + sid + \
-                    '_' + str(int(n)+1).zfill(2) + \
+                    '_' + str(int(n) + 1).zfill(2) + \
                     '/st/' + self.parse_ext_l(format) + \
-                    '/fileid/' + get_fileid(format, n)  + '?' + \
+                    '/fileid/' + get_fileid(format, n) + '?' + \
                     compat_urllib_parse.urlencode(param)
                 video_urls.append(video_url)
             video_urls_dict[format] = video_urls
@@ -132,34 +133,34 @@ def generate_ep(format, n):
 
     def get_hd(self, fm):
         hd_id_dict = {
-            'flv'   : '0',
-            'mp4'   : '1',
-            'hd2'   : '2',
-            'hd3'   : '3',
-            '3gp'   : '0',
-            '3gphd' : '1'
+            'flv': '0',
+            'mp4': '1',
+            'hd2': '2',
+            'hd3': '3',
+            '3gp': '0',
+            '3gphd': '1'
         }
         return hd_id_dict[fm]
 
     def parse_ext_l(self, fm):
         ext_dict = {
-            'flv'   : 'flv',
-            'mp4'   : 'mp4',
-            'hd2'   : 'flv',
-            'hd3'   : 'flv',
-            '3gp'   : 'flv',
-            '3gphd' : 'mp4'
+            'flv': 'flv',
+            'mp4': 'mp4',
+            'hd2': 'flv',
+            'hd3': 'flv',
+            '3gp': 'flv',
+            '3gphd': 'mp4'
         }
         return ext_dict[fm]
 
     def get_format_name(self, fm):
         _dict = {
-            '3gp'   : 'h6',
-            '3gphd' : 'h5',
-            'flv'   : 'h4',
-            'mp4'   : 'h3',
-            'hd2'   : 'h2',
-            'hd3'   : 'h1'
+            '3gp': 'h6',
+            '3gphd': 'h5',
+            'flv': 'h4',
+            'mp4': 'h3',
+            'hd2': 'h2',
+            'hd3': 'h1'
         }
         return _dict[fm]
 
@@ -194,10 +195,9 @@ def _real_extract(self, url):
         # construct info
         entries = []
         for fm in data1['streamtypes']:
-            #formats = []
             video_urls = video_urls_dict[fm]
             for i in range(len(video_urls)):
-                if len(entries) < i+1:
+                if len(entries) < i + 1:
                     entries.append({'formats': []})
                 entries[i]['formats'].append(
                     {
@@ -211,7 +211,7 @@ def _real_extract(self, url):
         for i in range(len(entries)):
             entries[i].update(
                 {
-                    'id': '_part%d' % (i+1),
+                    'id': '_part%d' % (i + 1),
                     'title': title,
                 }
             )

From c203be3fb4f00388c81564dc0c85ff8a10ff4553 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Mon, 15 Jun 2015 23:28:59 +0800
Subject: [PATCH 06/13] [youku] Better handling for Python 2/3 compatibility

---
 youtube_dl/extractor/youku.py | 37 ++++++++++++-----------------------
 1 file changed, 13 insertions(+), 24 deletions(-)

diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py
index 4e47fca8a..26e5baadc 100644
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -7,9 +7,10 @@
 from .common import InfoExtractor
 from ..utils import ExtractorError
 
-from ..compat import compat_urllib_parse
-
-bytes_is_str = (bytes == str)  # for compatible
+from ..compat import (
+    compat_urllib_parse,
+    compat_ord,
+)
 
 
 class YoukuIE(InfoExtractor):
@@ -37,26 +38,20 @@ def yk_t(s1, s2):
             ls = list(range(256))
             t = 0
             for i in range(256):
-                t = (t + ls[i] + ord(s1[i % len(s1)])) % 256
+                t = (t + ls[i] + compat_ord(s1[i % len(s1)])) % 256
                 ls[i], ls[t] = ls[t], ls[i]
-            s = '' if not bytes_is_str else b''
+            s = bytearray()
             x, y = 0, 0
             for i in range(len(s2)):
                 y = (y + 1) % 256
                 x = (x + ls[y]) % 256
                 ls[x], ls[y] = ls[y], ls[x]
-                if isinstance(s2[i], int):
-                    s += chr(s2[i] ^ ls[(ls[x] + ls[y]) % 256])
-                else:
-                    s += chr(ord(s2[i]) ^ ls[(ls[x] + ls[y]) % 256])
-            return s
+                s.append(compat_ord(s2[i]) ^ ls[(ls[x] + ls[y]) % 256])
+            return bytes(s)
 
         sid, token = yk_t(
-            'becaf9be',
-            base64.b64decode(bytes(data2['ep'], 'ascii'))
-            if not bytes_is_str
-            else base64.b64decode(data2['ep'])
-        ).split('_')
+            b'becaf9be', base64.b64decode(data2['ep'].encode('ascii'))
+        ).decode('ascii').split('_')
 
         # get oip
         oip = data2['ip']
@@ -89,16 +84,10 @@ def get_fileid(format, n):
         def generate_ep(format, n):
             fileid = get_fileid(format, n)
             ep_t = yk_t(
-                'bf7e5f01',
-                bytes('%s_%s_%s' % (sid, fileid, token), 'ascii')
-                if not bytes_is_str
-                else ('%s_%s_%s' % (sid, fileid, token))
+                b'bf7e5f01',
+                ('%s_%s_%s' % (sid, fileid, token)).encode('ascii')
             )
-            ep = base64.b64encode(
-                bytes(ep_t, 'latin')
-                if not bytes_is_str
-                else ep_t
-            ).decode()
+            ep = base64.b64encode(ep_t).decode('ascii')
             return ep
 
         # generate video_urls

From 9383e66f9475eca0e64c09972c1392d92d17570c Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Mon, 15 Jun 2015 23:31:30 +0800
Subject: [PATCH 07/13] [youku] Use _match_id

---
 youtube_dl/extractor/youku.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py
index 26e5baadc..e41b48369 100644
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -1,7 +1,6 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
 import base64
 
 from .common import InfoExtractor
@@ -154,8 +153,7 @@ def get_format_name(self, fm):
         return _dict[fm]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         # request basic data
         data1_url = 'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id

From ee69799262e8344742b9d8b492fe792b4d586f6a Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Mon, 15 Jun 2015 23:36:28 +0800
Subject: [PATCH 08/13] [youku] Add a v.swf test case

---
 youtube_dl/extractor/youku.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py
index e41b48369..d8162a0c5 100644
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -21,7 +21,7 @@ class YoukuIE(InfoExtractor):
         (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
     '''
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
         'md5': '5f3af4192eabacc4501508d54a8cabd7',
         'info_dict': {
@@ -29,7 +29,10 @@ class YoukuIE(InfoExtractor):
             'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
             'ext': 'flv'
         }
-    }
+    }, {
+        'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf',
+        'only_matching': True,
+    }]
 
     def construct_video_urls(self, data1, data2):
         # get sid, token

From f1e66cb2eb40b48c6508acbe57207a2d99792bf0 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Mon, 15 Jun 2015 23:46:07 +0800
Subject: [PATCH 09/13] [youku] Change video_id and add a multipart test case

---
 youtube_dl/extractor/youku.py | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py
index d8162a0c5..d5b73ebce 100644
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -25,13 +25,20 @@ class YoukuIE(InfoExtractor):
         'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
         'md5': '5f3af4192eabacc4501508d54a8cabd7',
         'info_dict': {
-            'id': 'XMTc1ODE5Njcy',
+            'id': 'XMTc1ODE5Njcy_part1',
             'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
             'ext': 'flv'
         }
     }, {
         'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf',
         'only_matching': True,
+    }, {
+        'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html',
+        'info_dict': {
+            'id': 'XODgxNjg1Mzk2',
+            'title': '武媚娘传奇 85',
+        },
+        'playlist_count': 11,
     }]
 
     def construct_video_urls(self, data1, data2):
@@ -201,20 +208,14 @@ def _real_extract(self, url):
         for i in range(len(entries)):
             entries[i].update(
                 {
-                    'id': '_part%d' % (i + 1),
+                    'id': '%s_part%d' % (video_id, i + 1),
                     'title': title,
                 }
             )
 
-        if len(entries) > 1:
-            info = {
-                '_type': 'multi_video',
-                'id': video_id,
-                'title': title,
-                'entries': entries,
-            }
-        else:
-            info = entries[0]
-            info['id'] = video_id
-
-        return info
+        return {
+            '_type': 'multi_video',
+            'id': video_id,
+            'title': title,
+            'entries': entries,
+        }

From 04e7596680bce28beae2436bac0f6d1f01a45210 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Mon, 15 Jun 2015 23:54:55 +0800
Subject: [PATCH 10/13] [youku] Better error handling

---
 youtube_dl/extractor/youku.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py
index d5b73ebce..91f9f6bff 100644
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -176,13 +176,15 @@ def _real_extract(self, url):
 
         error_code = data1.get('error_code')
         if error_code:
-            # -8 means blocked outside China.
-            # Chinese and English, separated by newline.
             error = data1.get('error')
-            raise ExtractorError(
-                error or 'Server reported error %i' %
-                error_code,
-                expected=True)
+            if error is not None and '因版权原因无法观看此视频' in error:
+                raise ExtractorError(
+                    'Youku said: Sorry, this video is available in China only', expected=True)
+            else:
+                msg = 'Youku server reported error %i' % error_code
+                if error is not None:
+                    msg += ': ' + error
+                raise ExtractorError(msg)
 
         title = data1['title']
 

From 5228b756af2c2bfc2962a5b1bb6db1e6a41c9e05 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Tue, 16 Jun 2015 00:06:23 +0800
Subject: [PATCH 11/13] [youku] Add cn_verification_proxy support and add a
 georestricted test case

---
 youtube_dl/extractor/youku.py | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py
index 91f9f6bff..ea37dc8b2 100644
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -9,6 +9,7 @@
 from ..compat import (
     compat_urllib_parse,
     compat_ord,
+    compat_urllib_request,
 )
 
 
@@ -39,6 +40,14 @@ class YoukuIE(InfoExtractor):
             'title': '武媚娘传奇 85',
         },
         'playlist_count': 11,
+    }, {
+        'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html',
+        'info_dict': {
+            'id': 'XMTI1OTczNDM5Mg',
+            'title': '花千骨 04',
+        },
+        'playlist_count': 13,
+        'skip': 'Available in China only',
     }]
 
     def construct_video_urls(self, data1, data2):
@@ -165,14 +174,23 @@ def get_format_name(self, fm):
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
-        # request basic data
-        data1_url = 'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id
-        data2_url = 'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id
+        def retrieve_data(req_url, note):
+            req = compat_urllib_request.Request(req_url)
 
-        raw_data1 = self._download_json(data1_url, video_id)
-        raw_data2 = self._download_json(data2_url, video_id)
-        data1 = raw_data1['data'][0]
-        data2 = raw_data2['data'][0]
+            cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
+            if cn_verification_proxy:
+                req.add_header('Ytdl-request-proxy', cn_verification_proxy)
+
+            raw_data = self._download_json(req, video_id, note=note)
+            return raw_data['data'][0]
+
+        # request basic data
+        data1 = retrieve_data(
+            'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id,
+            'Downloading JSON metadata 1')
+        data2 = retrieve_data(
+            'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id,
+            'Downloading JSON metadata 2')
 
         error_code = data1.get('error_code')
         if error_code:

From a155b7e76c5a71c650f62c4716d23a24943fc373 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Tue, 16 Jun 2015 00:15:09 +0800
Subject: [PATCH 12/13] [youku] Coding style

---
 youtube_dl/extractor/youku.py | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py
index ea37dc8b2..cab5be3a4 100644
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -216,22 +216,18 @@ def retrieve_data(req_url, note):
             for i in range(len(video_urls)):
                 if len(entries) < i + 1:
                     entries.append({'formats': []})
-                entries[i]['formats'].append(
-                    {
-                        'url': video_urls[i],
-                        'format_id': self.get_format_name(fm),
-                        'ext': self.parse_ext_l(fm),
-                        'filesize': int(data1['segs'][fm][i]['size'])
-                    }
-                )
+                entries[i]['formats'].append({
+                    'url': video_urls[i],
+                    'format_id': self.get_format_name(fm),
+                    'ext': self.parse_ext_l(fm),
+                    'filesize': int(data1['segs'][fm][i]['size'])
+                })
 
         for i in range(len(entries)):
-            entries[i].update(
-                {
-                    'id': '%s_part%d' % (video_id, i + 1),
-                    'title': title,
-                }
-            )
+            entries[i].update({
+                'id': '%s_part%d' % (video_id, i + 1),
+                'title': title,
+            })
 
         return {
             '_type': 'multi_video',

From 0501bfa159db5b5e8ed7fd1ed966b9989becb3e9 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Tue, 16 Jun 2015 00:15:30 +0800
Subject: [PATCH 13/13] [YoutubeDL] Youku extractor now uses the standard
 format selection

---
 youtube_dl/YoutubeDL.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index aacec2958..6e4b6f566 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1033,12 +1033,6 @@ def process_video_result(self, info_dict, download=True):
             info_dict['id'], info_dict.get('subtitles'),
             info_dict.get('automatic_captions'))
 
-        # This extractors handle format selection themselves
-        if info_dict['extractor'] in ['Youku']:
-            if download:
-                self.process_info(info_dict)
-            return info_dict
-
         # We now pick which formats have to be downloaded
         if info_dict.get('formats') is None:
             # There's only one format available