[phantomjs] add cookie support
parent
da57ebaf84
commit
40e41780f1
|
@ -2343,10 +2343,12 @@ class InfoExtractor(object):
|
||||||
self._downloader.report_warning(msg)
|
self._downloader.report_warning(msg)
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def _set_cookie(self, domain, name, value, expire_time=None):
|
def _set_cookie(self, domain, name, value, expire_time=None, port=None,
|
||||||
|
path='/', secure=False, discard=False, rest={}, **kwargs):
|
||||||
cookie = compat_cookiejar.Cookie(
|
cookie = compat_cookiejar.Cookie(
|
||||||
0, name, value, None, None, domain, None,
|
0, name, value, port, not port is None, domain, True,
|
||||||
None, '/', True, False, expire_time, '', None, None, None)
|
domain.startswith('.'), path, True, secure, expire_time,
|
||||||
|
discard, None, None, rest)
|
||||||
self._downloader.cookiejar.set_cookie(cookie)
|
self._downloader.cookiejar.set_cookie(cookie)
|
||||||
|
|
||||||
def _get_cookies(self, url):
|
def _get_cookies(self, url):
|
||||||
|
|
|
@ -3654,6 +3654,37 @@ def write_xattr(path, key, value):
|
||||||
"or the 'xattr' binary.")
|
"or the 'xattr' binary.")
|
||||||
|
|
||||||
|
|
||||||
|
def cookie_to_dict(cookie):
|
||||||
|
cookie_dict = {
|
||||||
|
'name': cookie.name,
|
||||||
|
'value': cookie.value,
|
||||||
|
};
|
||||||
|
if cookie.port_specified:
|
||||||
|
cookie_dict['port'] = cookie.port
|
||||||
|
if cookie.domain_specified:
|
||||||
|
cookie_dict['domain'] = cookie.domain
|
||||||
|
if cookie.path_specified:
|
||||||
|
cookie_dict['path'] = cookie.path
|
||||||
|
if not cookie.expires is None:
|
||||||
|
cookie_dict['expires'] = cookie.expires
|
||||||
|
if not cookie.secure is None:
|
||||||
|
cookie_dict['secure'] = cookie.secure
|
||||||
|
if not cookie.discard is None:
|
||||||
|
cookie_dict['discard'] = cookie.discard
|
||||||
|
try:
|
||||||
|
if (cookie.has_nonstandard_attr('httpOnly') or
|
||||||
|
cookie.has_nonstandard_attr('httponly') or
|
||||||
|
cookie.has_nonstandard_attr('HttpOnly')):
|
||||||
|
cookie_dict['httponly'] = True
|
||||||
|
except TypeError:
|
||||||
|
pass
|
||||||
|
return cookie_dict
|
||||||
|
|
||||||
|
|
||||||
|
def cookie_jar_to_list(cookie_jar):
|
||||||
|
return [cookie_to_dict(cookie) for cookie in cookie_jar]
|
||||||
|
|
||||||
|
|
||||||
class PhantomJSwrapper(object):
|
class PhantomJSwrapper(object):
|
||||||
"""PhantomJS wrapper class"""
|
"""PhantomJS wrapper class"""
|
||||||
|
|
||||||
|
@ -3674,6 +3705,9 @@ class PhantomJSwrapper(object):
|
||||||
var fs = require('fs');
|
var fs = require('fs');
|
||||||
var read = {{ mode: 'r', charset: 'utf-8' }};
|
var read = {{ mode: 'r', charset: 'utf-8' }};
|
||||||
var write = {{ mode: 'w', charset: 'utf-8' }};
|
var write = {{ mode: 'w', charset: 'utf-8' }};
|
||||||
|
JSON.parse(fs.read("{cookies}", read)).forEach(function(x) {{
|
||||||
|
phantom.addCookie(x);
|
||||||
|
}});
|
||||||
page.settings.resourceTimeout = {timeout};
|
page.settings.resourceTimeout = {timeout};
|
||||||
page.settings.userAgent = "{ua}";
|
page.settings.userAgent = "{ua}";
|
||||||
page.onLoadStarted = function() {{
|
page.onLoadStarted = function() {{
|
||||||
|
@ -3684,6 +3718,7 @@ class PhantomJSwrapper(object):
|
||||||
}};
|
}};
|
||||||
var saveAndExit = function() {{
|
var saveAndExit = function() {{
|
||||||
fs.write("{html}", page.content, write);
|
fs.write("{html}", page.content, write);
|
||||||
|
fs.write("{cookies}", JSON.stringify(phantom.cookies), write);
|
||||||
phantom.exit();
|
phantom.exit();
|
||||||
}};
|
}};
|
||||||
page.onLoadFinished = function(status) {{
|
page.onLoadFinished = function(status) {{
|
||||||
|
@ -3697,7 +3732,7 @@ class PhantomJSwrapper(object):
|
||||||
page.open("");
|
page.open("");
|
||||||
'''
|
'''
|
||||||
|
|
||||||
_TMP_FILE_NAMES = ['script', 'html']
|
_TMP_FILE_NAMES = ['script', 'html', 'cookies']
|
||||||
|
|
||||||
def __init__(self, extractor, timeout=10000):
|
def __init__(self, extractor, timeout=10000):
|
||||||
self.exe = check_executable('phantomjs', ['-v'])
|
self.exe = check_executable('phantomjs', ['-v'])
|
||||||
|
@ -3722,6 +3757,26 @@ class PhantomJSwrapper(object):
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def _save_cookies(self, url):
|
||||||
|
cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar)
|
||||||
|
for cookie in cookies:
|
||||||
|
if 'path' not in cookie:
|
||||||
|
cookie['path'] = '/'
|
||||||
|
if 'domain' not in cookie:
|
||||||
|
cookie['domain'] = compat_urlparse.urlparse(url).netloc
|
||||||
|
with open(self._TMP_FILES['cookies'].name, 'wb') as f:
|
||||||
|
f.write(json.dumps(cookies).encode('utf-8'))
|
||||||
|
|
||||||
|
def _load_cookies(self):
|
||||||
|
with open(self._TMP_FILES['cookies'].name, 'rb') as f:
|
||||||
|
cookies = json.loads(f.read().decode('utf-8'))
|
||||||
|
for cookie in cookies:
|
||||||
|
if cookie['httponly'] is True:
|
||||||
|
cookie['rest'] = { 'httpOnly': None }
|
||||||
|
if 'expiry' in cookie:
|
||||||
|
cookie['expire_time'] = cookie['expiry']
|
||||||
|
self.extractor._set_cookie(**cookie)
|
||||||
|
|
||||||
def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'):
|
def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'):
|
||||||
"""
|
"""
|
||||||
Downloads webpage (if needed) and executes JS
|
Downloads webpage (if needed) and executes JS
|
||||||
|
@ -3765,6 +3820,8 @@ class PhantomJSwrapper(object):
|
||||||
with open(self._TMP_FILES['html'].name, 'wb') as f:
|
with open(self._TMP_FILES['html'].name, 'wb') as f:
|
||||||
f.write(html.encode('utf-8'))
|
f.write(html.encode('utf-8'))
|
||||||
|
|
||||||
|
self._save_cookies(url)
|
||||||
|
|
||||||
replaces = self.options
|
replaces = self.options
|
||||||
replaces['url'] = url
|
replaces['url'] = url
|
||||||
user_agent = headers.get('User-Agent') or std_headers['User-Agent']
|
user_agent = headers.get('User-Agent') or std_headers['User-Agent']
|
||||||
|
@ -3791,5 +3848,8 @@ class PhantomJSwrapper(object):
|
||||||
+ encodeArgument(err))
|
+ encodeArgument(err))
|
||||||
with open(self._TMP_FILES['html'].name, 'rb') as f:
|
with open(self._TMP_FILES['html'].name, 'rb') as f:
|
||||||
html = f.read().decode('utf-8')
|
html = f.read().decode('utf-8')
|
||||||
|
|
||||||
|
self._load_cookies()
|
||||||
|
|
||||||
return (html, encodeArgument(out))
|
return (html, encodeArgument(out))
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue