new: Upload optional cookie file

pull/67/head
Raphaël Vinot 2020-01-24 10:17:41 +01:00
parent 8e5d254caa
commit f1309ce5e3
4 changed files with 42 additions and 20 deletions

View File

@ -1,7 +1,8 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import os import os
from typing import List from typing import List, Optional
from io import BytesIO
from pathlib import Path from pathlib import Path
from .exceptions import MissingEnv, CreateDirectoryException from .exceptions import MissingEnv, CreateDirectoryException
from redis import Redis from redis import Redis
@ -143,22 +144,28 @@ def get_user_agents() -> dict:
with open(paths[0]) as f: with open(paths[0]) as f:
return json.load(f) return json.load(f)
def load_cookies() -> List[dict]: def load_cookies(cookie_pseudofile: Optional[BytesIO]=None) -> List[dict]:
if not (get_homedir() / 'cookies.json').exists(): if cookie_pseudofile:
return [] cookies = json.load(cookie_pseudofile)
else:
if not (get_homedir() / 'cookies.json').exists():
return []
with (get_homedir() / 'cookies.json').open() as f: with (get_homedir() / 'cookies.json').open() as f:
cookies = json.load(f) cookies = json.load(f)
to_return = [] to_return = []
for cookie in cookies: try:
u = urlparse(cookie['Host raw']).netloc.split(':', 1)[0] for cookie in cookies:
to_add = {'path': cookie['Path raw'], u = urlparse(cookie['Host raw']).netloc.split(':', 1)[0]
'name': cookie['Name raw'], to_add = {'path': cookie['Path raw'],
'httpOnly': cookie['HTTP only raw'] == 'true', 'name': cookie['Name raw'],
'secure': cookie['Send for'] == 'Encrypted connections only', 'httpOnly': cookie['HTTP only raw'] == 'true',
'expires': (datetime.now() + timedelta(days=10)).strftime('%Y-%m-%dT%H:%M:%S') + 'Z', 'secure': cookie['Send for'] == 'Encrypted connections only',
'domain': u, 'expires': (datetime.now() + timedelta(days=10)).strftime('%Y-%m-%dT%H:%M:%S') + 'Z',
'value': cookie['Content raw'] 'domain': u,
} 'value': cookie['Content raw']
to_return.append(to_add) }
to_return.append(to_add)
except Exception as e:
print(f'Unable to load the cookie file: {e}')
return to_return return to_return

View File

@ -162,7 +162,7 @@ class Lookyloo():
return self.sanejs.sha512(sha512) return self.sanejs.sha512(sha512)
return {'response': []} return {'response': []}
def scrape(self, url: str, cookies: List[dict]=[], depth: int=1, listing: bool=True, user_agent: Optional[str]=None, perma_uuid: str=None, def scrape(self, url: str, cookies_pseudofile: Optional[BytesIO]=None, depth: int=1, listing: bool=True, user_agent: Optional[str]=None, perma_uuid: str=None,
os: str=None, browser: str=None) -> Union[bool, str]: os: str=None, browser: str=None) -> Union[bool, str]:
if not url.startswith('http'): if not url.startswith('http'):
url = f'http://{url}' url = f'http://{url}'
@ -175,7 +175,8 @@ class Lookyloo():
return False return False
else: else:
return False return False
cookies = load_cookies()
cookies = load_cookies(cookies_pseudofile)
items = crawl(self.splash_url, url, cookies=cookies, depth=depth, user_agent=user_agent, log_enabled=True, log_level='INFO') items = crawl(self.splash_url, url, cookies=cookies, depth=depth, user_agent=user_agent, log_enabled=True, log_level='INFO')
if not items: if not items:
# broken # broken

View File

@ -64,7 +64,14 @@ def submit():
@app.route('/scrape', methods=['GET', 'POST']) @app.route('/scrape', methods=['GET', 'POST'])
def scrape_web(): def scrape_web():
if request.form.get('url'): if request.form.get('url'):
perma_uuid = lookyloo.scrape(url=request.form.get('url'), depth=request.form.get('depth'), # check if the post request has the file part
if 'file' in request.files and request.files['file'].filename:
cookie_file = BytesIO(request.files['file'].stream.read())
else:
cookie_file = None
perma_uuid = lookyloo.scrape(url=request.form.get('url'),
cookies_pseudofile=cookie_file,
depth=request.form.get('depth'),
listing=request.form.get('listing'), user_agent=request.form.get('user_agent'), listing=request.form.get('listing'), user_agent=request.form.get('user_agent'),
os=request.form.get('os'), browser=request.form.get('browser')) os=request.form.get('os'), browser=request.form.get('browser'))
return redirect(url_for('tree', tree_uuid=perma_uuid)) return redirect(url_for('tree', tree_uuid=perma_uuid))

View File

@ -41,6 +41,13 @@
</div> </div>
</div> </div>
</div> </div>
<div class="form-group row">
<label for="cookies" class="col-sm-6">Cookies (JSON export from the Firefox plugin Cookie Quick Manager)</label>
<div class="col-sm-4">
<input type="file" class="form-control-file" id="cookies">
</div>
</div>
<div class="dropdown-divider"></div> <div class="dropdown-divider"></div>
<p><u>Browser Configuration</u></p> <p><u>Browser Configuration</u></p>
<div class="form-group row"> <div class="form-group row">