mirror of https://github.com/CIRCL/lookyloo
new: Upload optional cookie file
parent
8e5d254caa
commit
f1309ce5e3
|
@ -1,7 +1,8 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
from typing import List
|
||||
from typing import List, Optional
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from .exceptions import MissingEnv, CreateDirectoryException
|
||||
from redis import Redis
|
||||
|
@ -143,22 +144,28 @@ def get_user_agents() -> dict:
|
|||
with open(paths[0]) as f:
|
||||
return json.load(f)
|
||||
|
||||
def load_cookies() -> List[dict]:
|
||||
if not (get_homedir() / 'cookies.json').exists():
|
||||
return []
|
||||
def load_cookies(cookie_pseudofile: Optional[BytesIO]=None) -> List[dict]:
|
||||
if cookie_pseudofile:
|
||||
cookies = json.load(cookie_pseudofile)
|
||||
else:
|
||||
if not (get_homedir() / 'cookies.json').exists():
|
||||
return []
|
||||
|
||||
with (get_homedir() / 'cookies.json').open() as f:
|
||||
cookies = json.load(f)
|
||||
with (get_homedir() / 'cookies.json').open() as f:
|
||||
cookies = json.load(f)
|
||||
to_return = []
|
||||
for cookie in cookies:
|
||||
u = urlparse(cookie['Host raw']).netloc.split(':', 1)[0]
|
||||
to_add = {'path': cookie['Path raw'],
|
||||
'name': cookie['Name raw'],
|
||||
'httpOnly': cookie['HTTP only raw'] == 'true',
|
||||
'secure': cookie['Send for'] == 'Encrypted connections only',
|
||||
'expires': (datetime.now() + timedelta(days=10)).strftime('%Y-%m-%dT%H:%M:%S') + 'Z',
|
||||
'domain': u,
|
||||
'value': cookie['Content raw']
|
||||
}
|
||||
to_return.append(to_add)
|
||||
try:
|
||||
for cookie in cookies:
|
||||
u = urlparse(cookie['Host raw']).netloc.split(':', 1)[0]
|
||||
to_add = {'path': cookie['Path raw'],
|
||||
'name': cookie['Name raw'],
|
||||
'httpOnly': cookie['HTTP only raw'] == 'true',
|
||||
'secure': cookie['Send for'] == 'Encrypted connections only',
|
||||
'expires': (datetime.now() + timedelta(days=10)).strftime('%Y-%m-%dT%H:%M:%S') + 'Z',
|
||||
'domain': u,
|
||||
'value': cookie['Content raw']
|
||||
}
|
||||
to_return.append(to_add)
|
||||
except Exception as e:
|
||||
print(f'Unable to load the cookie file: {e}')
|
||||
return to_return
|
||||
|
|
|
@ -162,7 +162,7 @@ class Lookyloo():
|
|||
return self.sanejs.sha512(sha512)
|
||||
return {'response': []}
|
||||
|
||||
def scrape(self, url: str, cookies: List[dict]=[], depth: int=1, listing: bool=True, user_agent: Optional[str]=None, perma_uuid: str=None,
|
||||
def scrape(self, url: str, cookies_pseudofile: Optional[BytesIO]=None, depth: int=1, listing: bool=True, user_agent: Optional[str]=None, perma_uuid: str=None,
|
||||
os: str=None, browser: str=None) -> Union[bool, str]:
|
||||
if not url.startswith('http'):
|
||||
url = f'http://{url}'
|
||||
|
@ -175,7 +175,8 @@ class Lookyloo():
|
|||
return False
|
||||
else:
|
||||
return False
|
||||
cookies = load_cookies()
|
||||
|
||||
cookies = load_cookies(cookies_pseudofile)
|
||||
items = crawl(self.splash_url, url, cookies=cookies, depth=depth, user_agent=user_agent, log_enabled=True, log_level='INFO')
|
||||
if not items:
|
||||
# broken
|
||||
|
|
|
@ -64,7 +64,14 @@ def submit():
|
|||
@app.route('/scrape', methods=['GET', 'POST'])
|
||||
def scrape_web():
|
||||
if request.form.get('url'):
|
||||
perma_uuid = lookyloo.scrape(url=request.form.get('url'), depth=request.form.get('depth'),
|
||||
# check if the post request has the file part
|
||||
if 'file' in request.files and request.files['file'].filename:
|
||||
cookie_file = BytesIO(request.files['file'].stream.read())
|
||||
else:
|
||||
cookie_file = None
|
||||
perma_uuid = lookyloo.scrape(url=request.form.get('url'),
|
||||
cookies_pseudofile=cookie_file,
|
||||
depth=request.form.get('depth'),
|
||||
listing=request.form.get('listing'), user_agent=request.form.get('user_agent'),
|
||||
os=request.form.get('os'), browser=request.form.get('browser'))
|
||||
return redirect(url_for('tree', tree_uuid=perma_uuid))
|
||||
|
|
|
@ -41,6 +41,13 @@
|
|||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group row">
|
||||
<label for="cookies" class="col-sm-6">Cookies (JSON export from the Firefox plugin Cookie Quick Manager)</label>
|
||||
<div class="col-sm-4">
|
||||
<input type="file" class="form-control-file" id="cookies">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="dropdown-divider"></div>
|
||||
<p><u>Browser Configuration</u></p>
|
||||
<div class="form-group row">
|
||||
|
|
Loading…
Reference in New Issue