From 587021cd9f717181b44e881941aca3f8d753758b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 18 Aug 2022 21:34:47 +0530 Subject: [PATCH] [phantomjs] Add function to execute JS without a DOM Authored by: MinePlayersPE, pukkandan --- yt_dlp/extractor/openload.py | 62 ++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 24 deletions(-) diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/extractor/openload.py index f12a0eff1..e66ed4831 100644 --- a/yt_dlp/extractor/openload.py +++ b/yt_dlp/extractor/openload.py @@ -1,3 +1,4 @@ +import collections import contextlib import json import os @@ -9,8 +10,10 @@ ExtractorError, Popen, check_executable, + format_field, get_exe_version, is_outdated_version, + shell_quote, ) @@ -49,7 +52,7 @@ class PhantomJSwrapper: This class is experimental. """ - _TEMPLATE = r''' + _BASE_JS = R''' phantom.onError = function(msg, trace) {{ var msgStack = ['PHANTOM ERROR: ' + msg]; if(trace && trace.length) {{ @@ -62,6 +65,9 @@ class PhantomJSwrapper: console.error(msgStack.join('\n')); phantom.exit(1); }}; + ''' + + _TEMPLATE = R''' var page = require('webpage').create(); var fs = require('fs'); var read = {{ mode: 'r', charset: 'utf-8' }}; @@ -116,14 +122,18 @@ def __init__(self, extractor, required_version=None, timeout=10000): 'Your copy of PhantomJS is outdated, update it to version ' '%s or newer if you encounter any errors.' % required_version) - self.options = { - 'timeout': timeout, - } for name in self._TMP_FILE_NAMES: tmp = tempfile.NamedTemporaryFile(delete=False) tmp.close() self._TMP_FILES[name] = tmp + self.options = collections.ChainMap({ + 'timeout': timeout, + }, { + x: self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"') + for x in self._TMP_FILE_NAMES + }) + def __del__(self): for name in self._TMP_FILE_NAMES: with contextlib.suppress(OSError, KeyError): @@ -194,31 +204,35 @@ def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on w self._save_cookies(url) - replaces = self.options - replaces['url'] = url user_agent = headers.get('User-Agent') or self.extractor.get_param('http_headers')['User-Agent'] - replaces['ua'] = user_agent.replace('"', '\\"') - replaces['jscode'] = jscode + jscode = self._TEMPLATE.format_map(self.options.new_child({ + 'url': url, + 'ua': user_agent.replace('"', '\\"'), + 'jscode': jscode, + })) - for x in self._TMP_FILE_NAMES: - replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"') + stdout = self.execute(jscode, video_id, note2) - with open(self._TMP_FILES['script'].name, 'wb') as f: - f.write(self._TEMPLATE.format(**replaces).encode('utf-8')) - - if video_id is None: - self.extractor.to_screen(f'{note2}') - else: - self.extractor.to_screen(f'{video_id}: {note2}') - - stdout, stderr, returncode = Popen.run( - [self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name], - text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - if returncode: - raise ExtractorError(f'Executing JS failed:\n{stderr}') with open(self._TMP_FILES['html'].name, 'rb') as f: html = f.read().decode('utf-8') - self._load_cookies() return html, stdout + + def execute(self, jscode, video_id=None, note='Executing JS'): + """Execute JS and return stdout""" + if 'phantom.exit();' not in jscode: + jscode += ';\nphantom.exit();' + jscode = self._BASE_JS + jscode + + with open(self._TMP_FILES['script'].name, 'w', encoding='utf-8') as f: + f.write(jscode) + self.extractor.to_screen(f'{format_field(video_id, None, "%s: ")}{note}') + + cmd = [self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name] + self.extractor.write_debug(f'PhantomJS command line: {shell_quote(cmd)}') + stdout, stderr, returncode = Popen.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if returncode: + raise ExtractorError(f'Executing JS failed:\n{stderr.strip()}') + + return stdout