mirror of https://github.com/CIRCL/lookyloo
new: Upload optional cookie file
parent
8e5d254caa
commit
f1309ce5e3
|
@ -1,7 +1,8 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
import os
|
import os
|
||||||
from typing import List
|
from typing import List, Optional
|
||||||
|
from io import BytesIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from .exceptions import MissingEnv, CreateDirectoryException
|
from .exceptions import MissingEnv, CreateDirectoryException
|
||||||
from redis import Redis
|
from redis import Redis
|
||||||
|
@ -143,13 +144,17 @@ def get_user_agents() -> dict:
|
||||||
with open(paths[0]) as f:
|
with open(paths[0]) as f:
|
||||||
return json.load(f)
|
return json.load(f)
|
||||||
|
|
||||||
def load_cookies() -> List[dict]:
|
def load_cookies(cookie_pseudofile: Optional[BytesIO]=None) -> List[dict]:
|
||||||
|
if cookie_pseudofile:
|
||||||
|
cookies = json.load(cookie_pseudofile)
|
||||||
|
else:
|
||||||
if not (get_homedir() / 'cookies.json').exists():
|
if not (get_homedir() / 'cookies.json').exists():
|
||||||
return []
|
return []
|
||||||
|
|
||||||
with (get_homedir() / 'cookies.json').open() as f:
|
with (get_homedir() / 'cookies.json').open() as f:
|
||||||
cookies = json.load(f)
|
cookies = json.load(f)
|
||||||
to_return = []
|
to_return = []
|
||||||
|
try:
|
||||||
for cookie in cookies:
|
for cookie in cookies:
|
||||||
u = urlparse(cookie['Host raw']).netloc.split(':', 1)[0]
|
u = urlparse(cookie['Host raw']).netloc.split(':', 1)[0]
|
||||||
to_add = {'path': cookie['Path raw'],
|
to_add = {'path': cookie['Path raw'],
|
||||||
|
@ -161,4 +166,6 @@ def load_cookies() -> List[dict]:
|
||||||
'value': cookie['Content raw']
|
'value': cookie['Content raw']
|
||||||
}
|
}
|
||||||
to_return.append(to_add)
|
to_return.append(to_add)
|
||||||
|
except Exception as e:
|
||||||
|
print(f'Unable to load the cookie file: {e}')
|
||||||
return to_return
|
return to_return
|
||||||
|
|
|
@ -162,7 +162,7 @@ class Lookyloo():
|
||||||
return self.sanejs.sha512(sha512)
|
return self.sanejs.sha512(sha512)
|
||||||
return {'response': []}
|
return {'response': []}
|
||||||
|
|
||||||
def scrape(self, url: str, cookies: List[dict]=[], depth: int=1, listing: bool=True, user_agent: Optional[str]=None, perma_uuid: str=None,
|
def scrape(self, url: str, cookies_pseudofile: Optional[BytesIO]=None, depth: int=1, listing: bool=True, user_agent: Optional[str]=None, perma_uuid: str=None,
|
||||||
os: str=None, browser: str=None) -> Union[bool, str]:
|
os: str=None, browser: str=None) -> Union[bool, str]:
|
||||||
if not url.startswith('http'):
|
if not url.startswith('http'):
|
||||||
url = f'http://{url}'
|
url = f'http://{url}'
|
||||||
|
@ -175,7 +175,8 @@ class Lookyloo():
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
cookies = load_cookies()
|
|
||||||
|
cookies = load_cookies(cookies_pseudofile)
|
||||||
items = crawl(self.splash_url, url, cookies=cookies, depth=depth, user_agent=user_agent, log_enabled=True, log_level='INFO')
|
items = crawl(self.splash_url, url, cookies=cookies, depth=depth, user_agent=user_agent, log_enabled=True, log_level='INFO')
|
||||||
if not items:
|
if not items:
|
||||||
# broken
|
# broken
|
||||||
|
|
|
@ -64,7 +64,14 @@ def submit():
|
||||||
@app.route('/scrape', methods=['GET', 'POST'])
|
@app.route('/scrape', methods=['GET', 'POST'])
|
||||||
def scrape_web():
|
def scrape_web():
|
||||||
if request.form.get('url'):
|
if request.form.get('url'):
|
||||||
perma_uuid = lookyloo.scrape(url=request.form.get('url'), depth=request.form.get('depth'),
|
# check if the post request has the file part
|
||||||
|
if 'file' in request.files and request.files['file'].filename:
|
||||||
|
cookie_file = BytesIO(request.files['file'].stream.read())
|
||||||
|
else:
|
||||||
|
cookie_file = None
|
||||||
|
perma_uuid = lookyloo.scrape(url=request.form.get('url'),
|
||||||
|
cookies_pseudofile=cookie_file,
|
||||||
|
depth=request.form.get('depth'),
|
||||||
listing=request.form.get('listing'), user_agent=request.form.get('user_agent'),
|
listing=request.form.get('listing'), user_agent=request.form.get('user_agent'),
|
||||||
os=request.form.get('os'), browser=request.form.get('browser'))
|
os=request.form.get('os'), browser=request.form.get('browser'))
|
||||||
return redirect(url_for('tree', tree_uuid=perma_uuid))
|
return redirect(url_for('tree', tree_uuid=perma_uuid))
|
||||||
|
|
|
@ -41,6 +41,13 @@
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="form-group row">
|
||||||
|
<label for="cookies" class="col-sm-6">Cookies (JSON export from the Firefox plugin Cookie Quick Manager)</label>
|
||||||
|
<div class="col-sm-4">
|
||||||
|
<input type="file" class="form-control-file" id="cookies">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div class="dropdown-divider"></div>
|
<div class="dropdown-divider"></div>
|
||||||
<p><u>Browser Configuration</u></p>
|
<p><u>Browser Configuration</u></p>
|
||||||
<div class="form-group row">
|
<div class="form-group row">
|
||||||
|
|
Loading…
Reference in New Issue