mirror of https://github.com/CIRCL/lookyloo
new: Upload a file instead of submitting a URL.
parent
2ce8b5a96c
commit
72c4e43474
|
@ -4,10 +4,13 @@ import asyncio
|
|||
import ipaddress
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import socket
|
||||
|
||||
from datetime import datetime
|
||||
from io import BufferedIOBase
|
||||
from pathlib import Path
|
||||
from tempfile import NamedTemporaryFile
|
||||
from typing import Dict, List, Optional, Tuple, Union
|
||||
from urllib.parse import urlsplit
|
||||
|
||||
|
@ -37,18 +40,18 @@ class AsyncCapture(AbstractManager):
|
|||
if not self.fox.available:
|
||||
self.logger.warning('Unable to setup the FOX module')
|
||||
|
||||
def thirdparty_submit(self, capture_data: Dict[str, str]) -> None:
|
||||
def thirdparty_submit(self, url: str) -> None:
|
||||
if self.fox.available:
|
||||
self.fox.capture_default_trigger(capture_data['url'], auto_trigger=True)
|
||||
self.fox.capture_default_trigger(url, auto_trigger=True)
|
||||
|
||||
async def process_capture_queue(self) -> None:
|
||||
'''Process a query from the capture queue'''
|
||||
value: List[Tuple[str, float]] = await self.redis.zpopmax('to_capture')
|
||||
value: List[Tuple[bytes, float]] = await self.redis.zpopmax('to_capture')
|
||||
if not value or not value[0]:
|
||||
# The queue was consumed by an other process.
|
||||
return
|
||||
uuid, _score = value[0]
|
||||
queue: Optional[str] = await self.redis.get(f'{uuid}_mgmt')
|
||||
uuid = value[0][0].decode()
|
||||
queue: Optional[bytes] = await self.redis.get(f'{uuid}_mgmt')
|
||||
await self.redis.sadd('ongoing', uuid)
|
||||
|
||||
async with self.redis.pipeline() as lazy_cleanup:
|
||||
|
@ -57,55 +60,70 @@ class AsyncCapture(AbstractManager):
|
|||
# queue shouldn't be none, but if it is, just ignore.
|
||||
await lazy_cleanup.zincrby('queues', -1, queue)
|
||||
|
||||
to_capture: Dict[str, str] = await self.redis.hgetall(uuid)
|
||||
to_capture: Dict[bytes, bytes] = await self.redis.hgetall(uuid)
|
||||
|
||||
if get_config('generic', 'default_public'):
|
||||
# By default, the captures are on the index, unless the user mark them as un-listed
|
||||
listing = False if ('listing' in to_capture and to_capture['listing'].lower() in ['false', '0', '']) else True
|
||||
listing = False if ('listing' in to_capture and to_capture[b'listing'].lower() in [b'false', b'0', b'']) else True
|
||||
else:
|
||||
# By default, the captures are not on the index, unless the user mark them as listed
|
||||
listing = True if ('listing' in to_capture and to_capture['listing'].lower() in ['true', '1']) else False
|
||||
listing = True if ('listing' in to_capture and to_capture[b'listing'].lower() in [b'true', b'1']) else False
|
||||
|
||||
# Turn the freetext for the headers into a dict
|
||||
headers = {}
|
||||
if 'headers' in to_capture:
|
||||
for header_line in to_capture['headers'].splitlines():
|
||||
headers: Dict[str, str] = {}
|
||||
if b'headers' in to_capture:
|
||||
for header_line in to_capture[b'headers'].decode().splitlines():
|
||||
if header_line and ':' in header_line:
|
||||
splitted = header_line.split(':', 1)
|
||||
if splitted and len(splitted) == 2:
|
||||
header, h_value = splitted
|
||||
if header and h_value:
|
||||
headers[header.strip()] = h_value.strip()
|
||||
if to_capture.get('dnt'):
|
||||
headers['DNT'] = to_capture['dnt']
|
||||
if to_capture.get(b'dnt'):
|
||||
headers['DNT'] = to_capture[b'dnt'].decode()
|
||||
|
||||
self.logger.info(f'Capturing {to_capture["url"]} - {uuid}')
|
||||
self.thirdparty_submit(to_capture)
|
||||
success, error_message = await self._capture(
|
||||
to_capture['url'],
|
||||
perma_uuid=uuid,
|
||||
cookies_pseudofile=to_capture.get('cookies', None),
|
||||
listing=listing,
|
||||
user_agent=to_capture.get('user_agent', None),
|
||||
referer=to_capture.get('referer', None),
|
||||
headers=headers if headers else None,
|
||||
proxy=to_capture.get('proxy', None),
|
||||
os=to_capture.get('os', None),
|
||||
browser=to_capture.get('browser', None),
|
||||
parent=to_capture.get('parent', None)
|
||||
)
|
||||
if success:
|
||||
self.logger.info(f'Successfully captured {to_capture["url"]} - {uuid}')
|
||||
if to_capture.get(b'document'):
|
||||
# we do not have a URL yet.
|
||||
document_name = Path(to_capture[b'document_name'].decode()).name
|
||||
tmp_f = NamedTemporaryFile(suffix=document_name, delete=False)
|
||||
with open(tmp_f.name, "wb") as f:
|
||||
f.write(to_capture[b'document'])
|
||||
url = f'file://{tmp_f.name}'
|
||||
else:
|
||||
self.logger.warning(f'Unable to capture {to_capture["url"]} - {uuid}: {error_message}')
|
||||
await lazy_cleanup.setex(f'error_{uuid}', 36000, f'{error_message} - {to_capture["url"]} - {uuid}')
|
||||
url = to_capture[b'url'].decode()
|
||||
self.thirdparty_submit(url)
|
||||
|
||||
self.logger.info(f'Capturing {url} - {uuid}')
|
||||
success, error_message = await self._capture(
|
||||
url,
|
||||
perma_uuid=uuid,
|
||||
cookies_pseudofile=to_capture.get(b'cookies', None),
|
||||
listing=listing,
|
||||
user_agent=to_capture[b'user_agent'].decode() if to_capture.get(b'user_agent') else None,
|
||||
referer=to_capture[b'referer'].decode() if to_capture.get(b'referer') else None,
|
||||
headers=headers if headers else None,
|
||||
proxy=to_capture[b'proxy'].decode() if to_capture.get(b'proxy') else None,
|
||||
os=to_capture[b'os'].decode() if to_capture.get(b'os') else None,
|
||||
browser=to_capture[b'browser'].decode() if to_capture.get(b'browser') else None,
|
||||
parent=to_capture[b'parent'].decode() if to_capture.get(b'parent') else None
|
||||
)
|
||||
|
||||
if to_capture.get(b'document'):
|
||||
os.unlink(tmp_f.name)
|
||||
|
||||
if success:
|
||||
self.logger.info(f'Successfully captured {url} - {uuid}')
|
||||
else:
|
||||
self.logger.warning(f'Unable to capture {url} - {uuid}: {error_message}')
|
||||
await lazy_cleanup.setex(f'error_{uuid}', 36000, f'{error_message} - {url} - {uuid}')
|
||||
await lazy_cleanup.srem('ongoing', uuid)
|
||||
await lazy_cleanup.delete(uuid)
|
||||
# make sure to expire the key if nothing was processed for a while (= queues empty)
|
||||
await lazy_cleanup.expire('queues', 600)
|
||||
await lazy_cleanup.execute()
|
||||
|
||||
async def _capture(self, url: str, *, perma_uuid: str, cookies_pseudofile: Optional[Union[BufferedIOBase, str]]=None,
|
||||
async def _capture(self, url: str, *, perma_uuid: str,
|
||||
cookies_pseudofile: Optional[Union[BufferedIOBase, str, bytes]]=None,
|
||||
listing: bool=True, user_agent: Optional[str]=None,
|
||||
referer: Optional[str]=None,
|
||||
headers: Optional[Dict[str, str]]=None,
|
||||
|
@ -114,7 +132,7 @@ class AsyncCapture(AbstractManager):
|
|||
'''Launch a capture'''
|
||||
url = url.strip()
|
||||
url = refang(url)
|
||||
if not url.startswith('http'):
|
||||
if not url.startswith('data') and not url.startswith('http') and not url.startswith('file'):
|
||||
url = f'http://{url}'
|
||||
splitted_url = urlsplit(url)
|
||||
if self.only_global_lookups:
|
||||
|
@ -187,11 +205,11 @@ class AsyncCapture(AbstractManager):
|
|||
_parent.write(parent)
|
||||
|
||||
if 'downloaded_filename' in entries and entries['downloaded_filename']:
|
||||
with(dirpath / '0.data.filename').open('w') as _downloaded_filename:
|
||||
with (dirpath / '0.data.filename').open('w') as _downloaded_filename:
|
||||
_downloaded_filename.write(entries['downloaded_filename'])
|
||||
|
||||
if 'downloaded_file' in entries and entries['downloaded_file']:
|
||||
with(dirpath / '0.data').open('wb') as _downloaded_file:
|
||||
with (dirpath / '0.data').open('wb') as _downloaded_file:
|
||||
_downloaded_file.write(entries['downloaded_file'])
|
||||
|
||||
if 'error' in entries:
|
||||
|
@ -223,7 +241,7 @@ class AsyncCapture(AbstractManager):
|
|||
return True, 'All good!'
|
||||
|
||||
async def _to_run_forever_async(self):
|
||||
self.redis: Redis = Redis(unix_socket_path=get_socket_path('cache'), decode_responses=True)
|
||||
self.redis: Redis = Redis(unix_socket_path=get_socket_path('cache'))
|
||||
while await self.redis.exists('to_capture'):
|
||||
await self.process_capture_queue()
|
||||
if self.shutdown_requested():
|
||||
|
|
|
@ -129,14 +129,14 @@ def load_known_content(directory: str='known_content') -> Dict[str, Dict[str, An
|
|||
return to_return
|
||||
|
||||
|
||||
def load_cookies(cookie_pseudofile: Optional[Union[BufferedIOBase, str]]=None) -> List[Dict[str, Union[str, bool]]]:
|
||||
def load_cookies(cookie_pseudofile: Optional[Union[BufferedIOBase, str, bytes]]=None) -> List[Dict[str, Union[str, bool]]]:
|
||||
cookies: List[Dict[str, Union[str, bool]]]
|
||||
if cookie_pseudofile:
|
||||
if isinstance(cookie_pseudofile, str):
|
||||
if isinstance(cookie_pseudofile, (str, bytes)):
|
||||
try:
|
||||
cookies = json.loads(cookie_pseudofile)
|
||||
except json.decoder.JSONDecodeError:
|
||||
logger.warning(f'Unable to load json content: {cookie_pseudofile}')
|
||||
logger.warning(f'Unable to load json content: {cookie_pseudofile!r}')
|
||||
return []
|
||||
else:
|
||||
# Note: we might have an empty BytesIO, which is not False.
|
||||
|
|
|
@ -5,6 +5,7 @@ import hashlib
|
|||
import json
|
||||
import logging
|
||||
import operator
|
||||
import pickle
|
||||
import smtplib
|
||||
|
||||
from collections import defaultdict
|
||||
|
@ -398,11 +399,9 @@ class Lookyloo():
|
|||
query[key] = 1 if value else 0
|
||||
elif isinstance(value, (list, dict)):
|
||||
query[key] = json.dumps(value)
|
||||
elif isinstance(value, bytes):
|
||||
query[key] = value.decode()
|
||||
|
||||
# dirty deduplicate
|
||||
hash_query = hashlib.sha512(json.dumps(query).encode()).hexdigest()
|
||||
hash_query = hashlib.sha512(pickle.dumps(query)).hexdigest()
|
||||
# FIXME The line below should work, but it doesn't
|
||||
# if (existing_uuid := self.redis.set(f'query_hash:{hash_query}', temp_uuid, get=True, nx=True, ex=300)):
|
||||
if (existing_uuid := self.redis.get(f'query_hash:{hash_query}')):
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -6,7 +6,9 @@ import json
|
|||
import logging
|
||||
import os
|
||||
import time
|
||||
import filetype
|
||||
|
||||
import filetype # type: ignore
|
||||
|
||||
from datetime import date, datetime, timedelta, timezone
|
||||
from io import BytesIO, StringIO
|
||||
from typing import Any, Dict, List, Optional, Union, TypedDict
|
||||
|
@ -489,15 +491,19 @@ def image(tree_uuid: str):
|
|||
return send_file(to_return, mimetype='image/png',
|
||||
as_attachment=True, attachment_filename='image.png')
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/data', methods=['GET'])
|
||||
def data(tree_uuid: str):
|
||||
filename, data = lookyloo.get_data(tree_uuid)
|
||||
if len(filename) != 0:
|
||||
if len(filename) == 0:
|
||||
# TODO: return something saying it is not a valid request
|
||||
return
|
||||
|
||||
if filetype.guess_mime(data.getvalue()) is None:
|
||||
mime = 'application/octet-stream'
|
||||
else:
|
||||
mime = filetype.guess_mime(data.getvalue())
|
||||
return send_file(data, mimetype= mime,
|
||||
return send_file(data, mimetype=mime,
|
||||
as_attachment=True, attachment_filename=filename)
|
||||
|
||||
|
||||
|
@ -856,7 +862,11 @@ def capture_web():
|
|||
else:
|
||||
user = src_request_ip(request)
|
||||
|
||||
if request.method == 'POST' and (request.form.get('url') or request.form.get('urls')):
|
||||
if request.method == 'POST':
|
||||
if not (request.form.get('url') or request.form.get('urls') or 'document' in request.files):
|
||||
flash('Invalid submission: please submit at least a URL or a document.', 'error')
|
||||
return _prepare_capture_template(user_ua=request.headers.get('User-Agent'))
|
||||
|
||||
capture_query: Dict[str, Union[str, bytes, int, bool]] = {}
|
||||
# check if the post request has the file part
|
||||
if 'cookies' in request.files and request.files['cookies'].filename:
|
||||
|
@ -902,7 +912,7 @@ def capture_web():
|
|||
perma_uuid = lookyloo.enqueue_capture(capture_query, source='web', user=user, authenticated=flask_login.current_user.is_authenticated)
|
||||
time.sleep(2)
|
||||
return redirect(url_for('tree', tree_uuid=perma_uuid))
|
||||
else:
|
||||
elif request.form.get('urls'):
|
||||
# bulk query
|
||||
bulk_captures = []
|
||||
for url in request.form['urls'].split('\n'):
|
||||
|
@ -912,6 +922,13 @@ def capture_web():
|
|||
bulk_captures.append((new_capture_uuid, url))
|
||||
|
||||
return render_template('bulk_captures.html', bulk_captures=bulk_captures)
|
||||
elif 'document' in request.files:
|
||||
# File upload
|
||||
capture_query['document'] = request.files['document'].stream.read()
|
||||
capture_query['document_name'] = request.files['document'].filename
|
||||
perma_uuid = lookyloo.enqueue_capture(capture_query, source='web', user=user, authenticated=flask_login.current_user.is_authenticated)
|
||||
time.sleep(2)
|
||||
return redirect(url_for('tree', tree_uuid=perma_uuid))
|
||||
elif request.method == 'GET' and request.args.get('url'):
|
||||
url = unquote_plus(request.args['url']).strip()
|
||||
capture_query = {'url': url}
|
||||
|
|
|
@ -326,7 +326,9 @@ class CaptureCookies(Resource):
|
|||
# Just text
|
||||
|
||||
submit_fields_post = api.model('SubmitFieldsPost', {
|
||||
'url': fields.Url(description="The URL to capture", required=True),
|
||||
'url': fields.Url(description="The URL to capture"),
|
||||
'document': fields.String(description="A base64 encoded document, it can be anything a browser can display."),
|
||||
'document_name': fields.String(description="The name of the document."),
|
||||
'listing': fields.Integer(description="Display the capture on the index", min=0, max=1, example=1),
|
||||
'user_agent': fields.String(description="User agent to use for the capture", example=''),
|
||||
'referer': fields.String(description="Referer to pass to the capture", example=''),
|
||||
|
@ -376,6 +378,8 @@ class SubmitCapture(Resource):
|
|||
else:
|
||||
user = src_request_ip(request)
|
||||
to_query: Dict = request.get_json(force=True)
|
||||
if 'document' in to_query:
|
||||
to_query['document'] = base64.b64decode(to_query['document'])
|
||||
perma_uuid = lookyloo.enqueue_capture(to_query, source='api', user=user, authenticated=flask_login.current_user.is_authenticated)
|
||||
return perma_uuid
|
||||
|
||||
|
|
|
@ -39,12 +39,24 @@
|
|||
</div>
|
||||
</div>
|
||||
|
||||
<nav>
|
||||
<div class="nav nav-tabs" id="submission-type" role="tablist">
|
||||
<button class="nav-link active" id="nav-url-tab" data-bs-toggle="tab" data-bs-target="#nav-url" type="button" role="tab" aria-current="nav-url" aria-selected="true" href="#">URL(s)</button>
|
||||
<button class="nav-link" id="nav-doc-tab" data-bs-toggle="tab" data-bs-target="#nav-doc" type="button" role="tab" aria-current="nav-doc" aria-selected="false" href="#">Document</button>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<div class="tab-content" id="nav-tabContent">
|
||||
</br>
|
||||
<div class="tab-pane fade show active" id="nav-url" role="tabpanel" aria-labelledby="nav-url-tab">
|
||||
<div class="row input-group mb-3">
|
||||
<label for="url" class="col-sm-1 col-form-label">URL:</label>
|
||||
<input type="text" class="form-control col-auto" name="url" id=singleCaptureField
|
||||
placeholder="URL to capture" value="{{predefined_url_to_capture}}" required>
|
||||
placeholder="URL to capture" value="{{predefined_url_to_capture}}">
|
||||
|
||||
<textarea class="form-control col-auto d-none" placeholder="URLs to capture, one per line"
|
||||
name="urls" id=multipleCapturesField></textarea>
|
||||
|
||||
<span class="col-sm-2 input-group-text">
|
||||
<div class="form-check">
|
||||
<input class="form-check-input" name="multipleCaptures" id="multipleCaptures" type="checkbox"
|
||||
|
@ -53,6 +65,19 @@
|
|||
</div>
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="tab-pane fade" id="nav-doc" role="tabpanel" aria-labelledby="nav-doc-tab">
|
||||
<div class="row mb-3">
|
||||
<label for="document" class="col-sm-1 col-form-label">Document:</label>
|
||||
<div class="col-sm-10">
|
||||
<input type="file" class="form-control-file" id="document" name="document">
|
||||
<div>Instead of a URL, you can upload a file. Preferably an HTML document, but it can be anything supported by a browser.</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="dropdown-divider"></div>
|
||||
|
||||
<div>
|
||||
<button class="btn btn-link" type="button" data-bs-toggle="collapse" data-bs-target="#collapseConfigBrowser"
|
||||
|
@ -228,16 +253,12 @@
|
|||
if (document.getElementById('multipleCaptures').checked == true) {
|
||||
document.getElementById('singleCaptureField').value = '';
|
||||
$("#singleCaptureField").addClass("d-none");
|
||||
$("#singleCaptureField").removeAttr("required");
|
||||
$("#multipleCapturesField").removeClass("d-none");
|
||||
$("#multipleCapturesField").attr("required", true);
|
||||
}
|
||||
else {
|
||||
document.getElementById('multipleCapturesField').value = '';
|
||||
$("#singleCaptureField").removeClass("d-none");
|
||||
$("#singleCaptureField").attr("required", true);
|
||||
$("#multipleCapturesField").addClass("d-none");
|
||||
$("#multipleCapturesField").removeAttr("required");
|
||||
}
|
||||
})
|
||||
</script>
|
||||
|
|
Loading…
Reference in New Issue