mirror of https://github.com/CIRCL/lookyloo
new: Store capture settings, use TypedDict whenever possible.
parent
1603c99d5e
commit
582b5956e9
|
@ -7,12 +7,12 @@ import logging.config
|
||||||
import signal
|
import signal
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, Optional, Set, Union
|
from typing import Optional, Set, Union
|
||||||
|
|
||||||
from lacuscore import LacusCore, CaptureStatus as CaptureStatusCore, CaptureResponse as CaptureResponseCore
|
from lacuscore import LacusCore, CaptureStatus as CaptureStatusCore, CaptureResponse as CaptureResponseCore
|
||||||
from pylacus import PyLacus, CaptureStatus as CaptureStatusPy, CaptureResponse as CaptureResponsePy
|
from pylacus import PyLacus, CaptureStatus as CaptureStatusPy, CaptureResponse as CaptureResponsePy
|
||||||
|
|
||||||
from lookyloo.lookyloo import Lookyloo
|
from lookyloo.lookyloo import Lookyloo, CaptureSettings
|
||||||
from lookyloo.default import AbstractManager, get_config
|
from lookyloo.default import AbstractManager, get_config
|
||||||
from lookyloo.helpers import get_captures_dir
|
from lookyloo.helpers import get_captures_dir
|
||||||
|
|
||||||
|
@ -73,14 +73,14 @@ class AsyncCapture(AbstractManager):
|
||||||
self.lookyloo.redis.sadd('ongoing', uuid)
|
self.lookyloo.redis.sadd('ongoing', uuid)
|
||||||
queue: Optional[str] = self.lookyloo.redis.getdel(f'{uuid}_mgmt')
|
queue: Optional[str] = self.lookyloo.redis.getdel(f'{uuid}_mgmt')
|
||||||
|
|
||||||
to_capture: Dict[str, str] = self.lookyloo.redis.hgetall(uuid)
|
to_capture: CaptureSettings = self.lookyloo.redis.hgetall(uuid)
|
||||||
|
|
||||||
if get_config('generic', 'default_public'):
|
if get_config('generic', 'default_public'):
|
||||||
# By default, the captures are on the index, unless the user mark them as un-listed
|
# By default, the captures are on the index, unless the user mark them as un-listed
|
||||||
listing = False if ('listing' in to_capture and to_capture['listing'].lower() in ['false', '0', '']) else True
|
listing = False if ('listing' in to_capture and to_capture['listing'].lower() in ['false', '0', '']) else True # type: ignore
|
||||||
else:
|
else:
|
||||||
# By default, the captures are not on the index, unless the user mark them as listed
|
# By default, the captures are not on the index, unless the user mark them as listed
|
||||||
listing = True if ('listing' in to_capture and to_capture['listing'].lower() in ['true', '1']) else False
|
listing = True if ('listing' in to_capture and to_capture['listing'].lower() in ['true', '1']) else False # type: ignore
|
||||||
|
|
||||||
self.lookyloo.store_capture(
|
self.lookyloo.store_capture(
|
||||||
uuid, listing,
|
uuid, listing,
|
||||||
|
@ -91,11 +91,15 @@ class AsyncCapture(AbstractManager):
|
||||||
error=entries.get('error'), har=entries.get('har'),
|
error=entries.get('error'), har=entries.get('har'),
|
||||||
png=entries.get('png'), html=entries.get('html'),
|
png=entries.get('png'), html=entries.get('html'),
|
||||||
last_redirected_url=entries.get('last_redirected_url'),
|
last_redirected_url=entries.get('last_redirected_url'),
|
||||||
cookies=entries.get('cookies') # type: ignore
|
cookies=entries.get('cookies'),
|
||||||
|
capture_settings=to_capture
|
||||||
)
|
)
|
||||||
|
|
||||||
if ('auto_report' in to_capture):
|
if 'auto_report' in to_capture:
|
||||||
|
if isinstance(to_capture['auto_report'], str):
|
||||||
settings = json.loads(to_capture['auto_report'])
|
settings = json.loads(to_capture['auto_report'])
|
||||||
|
else:
|
||||||
|
settings = to_capture['auto_report']
|
||||||
if settings.get('email'):
|
if settings.get('email'):
|
||||||
self.lookyloo.send_mail(uuid, email=settings['email'],
|
self.lookyloo.send_mail(uuid, email=settings['email'],
|
||||||
comment=settings.get('comment'))
|
comment=settings.get('comment'))
|
||||||
|
|
|
@ -151,7 +151,7 @@ def load_known_content(directory: str='known_content') -> Dict[str, Dict[str, An
|
||||||
return to_return
|
return to_return
|
||||||
|
|
||||||
|
|
||||||
def load_cookies(cookie_pseudofile: Optional[Union[BufferedIOBase, str, bytes]]=None) -> List[Dict[str, Union[str, bool]]]:
|
def load_cookies(cookie_pseudofile: Optional[Union[BufferedIOBase, str, bytes, List[Dict[str, Union[str, bool]]]]]=None) -> List[Dict[str, Union[str, bool]]]:
|
||||||
cookies: List[Dict[str, Union[str, bool]]]
|
cookies: List[Dict[str, Union[str, bool]]]
|
||||||
if cookie_pseudofile:
|
if cookie_pseudofile:
|
||||||
if isinstance(cookie_pseudofile, (str, bytes)):
|
if isinstance(cookie_pseudofile, (str, bytes)):
|
||||||
|
@ -160,13 +160,16 @@ def load_cookies(cookie_pseudofile: Optional[Union[BufferedIOBase, str, bytes]]=
|
||||||
except json.decoder.JSONDecodeError:
|
except json.decoder.JSONDecodeError:
|
||||||
logger.warning(f'Unable to load json content: {cookie_pseudofile!r}')
|
logger.warning(f'Unable to load json content: {cookie_pseudofile!r}')
|
||||||
return []
|
return []
|
||||||
else:
|
elif isinstance(cookie_pseudofile, BufferedIOBase):
|
||||||
# Note: we might have an empty BytesIO, which is not False.
|
# Note: we might have an empty BytesIO, which is not False.
|
||||||
try:
|
try:
|
||||||
cookies = json.load(cookie_pseudofile)
|
cookies = json.load(cookie_pseudofile)
|
||||||
except json.decoder.JSONDecodeError:
|
except json.decoder.JSONDecodeError:
|
||||||
logger.warning(f'Unable to load json content: {cookie_pseudofile}')
|
logger.warning(f'Unable to load json content: {cookie_pseudofile}')
|
||||||
return []
|
return []
|
||||||
|
else:
|
||||||
|
# Already a dict
|
||||||
|
cookies = cookie_pseudofile
|
||||||
else:
|
else:
|
||||||
if not (get_homedir() / 'cookies.json').exists():
|
if not (get_homedir() / 'cookies.json').exists():
|
||||||
return []
|
return []
|
||||||
|
|
|
@ -12,8 +12,7 @@ from email.message import EmailMessage
|
||||||
from functools import cached_property
|
from functools import cached_property
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import (Any, Dict, Iterable, List, MutableMapping, Optional, Set,
|
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union, TYPE_CHECKING
|
||||||
Tuple, Union)
|
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
|
@ -21,17 +20,18 @@ from zipfile import ZipFile
|
||||||
from defang import defang # type: ignore
|
from defang import defang # type: ignore
|
||||||
from har2tree import CrawledTree, HostNode, URLNode
|
from har2tree import CrawledTree, HostNode, URLNode
|
||||||
from lacuscore import (LacusCore,
|
from lacuscore import (LacusCore,
|
||||||
CaptureStatus as CaptureStatusCore)
|
CaptureStatus as CaptureStatusCore,
|
||||||
# CaptureResponse as CaptureResponseCore,
|
# CaptureResponse as CaptureResponseCore)
|
||||||
# CaptureResponseJson as CaptureResponseJsonCore,
|
# CaptureResponseJson as CaptureResponseJsonCore,
|
||||||
# CaptureSettings as CaptureSettingsCore)
|
CaptureSettings as CaptureSettingsCore)
|
||||||
from PIL import Image, UnidentifiedImageError
|
from PIL import Image, UnidentifiedImageError
|
||||||
from playwrightcapture import get_devices
|
from playwrightcapture import get_devices
|
||||||
from pylacus import (PyLacus,
|
from pylacus import (PyLacus,
|
||||||
CaptureStatus as CaptureStatusPy)
|
CaptureStatus as CaptureStatusPy
|
||||||
# CaptureResponse as CaptureResponsePy,
|
# CaptureResponse as CaptureResponsePy,
|
||||||
# CaptureResponseJson as CaptureResponseJsonPy,
|
# CaptureResponseJson as CaptureResponseJsonPy,
|
||||||
# CaptureSettings as CaptureSettingsPy)
|
# CaptureSettings as CaptureSettingsPy
|
||||||
|
)
|
||||||
from pymisp import MISPAttribute, MISPEvent, MISPObject
|
from pymisp import MISPAttribute, MISPEvent, MISPObject
|
||||||
from pysecuritytxt import PySecurityTXT, SecurityTXTNotAvailable
|
from pysecuritytxt import PySecurityTXT, SecurityTXTNotAvailable
|
||||||
from pylookyloomonitoring import PyLookylooMonitoring
|
from pylookyloomonitoring import PyLookylooMonitoring
|
||||||
|
@ -52,6 +52,20 @@ from .modules import (MISP, PhishingInitiative, UniversalWhois,
|
||||||
UrlScan, VirusTotal, Phishtank, Hashlookup,
|
UrlScan, VirusTotal, Phishtank, Hashlookup,
|
||||||
RiskIQ, RiskIQError, Pandora, URLhaus)
|
RiskIQ, RiskIQError, Pandora, URLhaus)
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from playwright.async_api import Cookie
|
||||||
|
|
||||||
|
|
||||||
|
class CaptureSettings(CaptureSettingsCore, total=False):
|
||||||
|
'''The capture settings that can be passed to Lookyloo'''
|
||||||
|
listing: Optional[int]
|
||||||
|
not_queued: Optional[int]
|
||||||
|
auto_report: Optional[Union[str, Dict[str, str]]]
|
||||||
|
dnt: Optional[str]
|
||||||
|
browser_name: Optional[str]
|
||||||
|
os: Optional[str]
|
||||||
|
parent: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
class Lookyloo():
|
class Lookyloo():
|
||||||
|
|
||||||
|
@ -499,13 +513,20 @@ class Lookyloo():
|
||||||
self._captures_index.reload_cache(capture_uuid)
|
self._captures_index.reload_cache(capture_uuid)
|
||||||
return self._captures_index[capture_uuid].tree
|
return self._captures_index[capture_uuid].tree
|
||||||
|
|
||||||
def _prepare_lacus_query(self, query: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
|
def _prepare_lacus_query(self, query: CaptureSettings) -> CaptureSettings:
|
||||||
query = {k: v for k, v in query.items() if v is not None} # Remove the none, it makes redis unhappy
|
# Remove the none, it makes redis unhappy
|
||||||
|
query = {k: v for k, v in query.items() if v is not None} # type: ignore
|
||||||
# NOTE: Lookyloo' capture can pass a do not track header independently from the default headers, merging it here
|
# NOTE: Lookyloo' capture can pass a do not track header independently from the default headers, merging it here
|
||||||
headers = query.pop('headers', '')
|
headers = query.pop('headers', {})
|
||||||
if 'dnt' in query:
|
if 'dnt' in query:
|
||||||
|
if isinstance(headers, str):
|
||||||
headers += f'\nDNT: {query.pop("dnt")}'
|
headers += f'\nDNT: {query.pop("dnt")}'
|
||||||
headers = headers.strip()
|
headers = headers.strip()
|
||||||
|
elif isinstance(headers, dict):
|
||||||
|
dnt_entry = query.pop("dnt")
|
||||||
|
if dnt_entry:
|
||||||
|
headers['DNT'] = dnt_entry.strip()
|
||||||
|
|
||||||
if headers:
|
if headers:
|
||||||
query['headers'] = headers
|
query['headers'] = headers
|
||||||
|
|
||||||
|
@ -521,7 +542,7 @@ class Lookyloo():
|
||||||
query['user_agent'] = user_agent if user_agent else self.user_agents.default['useragent']
|
query['user_agent'] = user_agent if user_agent else self.user_agents.default['useragent']
|
||||||
|
|
||||||
# NOTE: the document must be base64 encoded
|
# NOTE: the document must be base64 encoded
|
||||||
document = query.pop('document', None)
|
document: Optional[Union[str, bytes]] = query.pop('document', None)
|
||||||
if document:
|
if document:
|
||||||
if isinstance(document, bytes):
|
if isinstance(document, bytes):
|
||||||
query['document'] = base64.b64encode(document).decode()
|
query['document'] = base64.b64encode(document).decode()
|
||||||
|
@ -529,7 +550,7 @@ class Lookyloo():
|
||||||
query['document'] = document
|
query['document'] = document
|
||||||
return query
|
return query
|
||||||
|
|
||||||
def enqueue_capture(self, query: MutableMapping[str, Any], source: str, user: str, authenticated: bool) -> str:
|
def enqueue_capture(self, query: CaptureSettings, source: str, user: str, authenticated: bool) -> str:
|
||||||
'''Enqueue a query in the capture queue (used by the UI and the API for asynchronous processing)'''
|
'''Enqueue a query in the capture queue (used by the UI and the API for asynchronous processing)'''
|
||||||
|
|
||||||
def get_priority(source: str, user: str, authenticated: bool) -> int:
|
def get_priority(source: str, user: str, authenticated: bool) -> int:
|
||||||
|
@ -547,14 +568,15 @@ class Lookyloo():
|
||||||
|
|
||||||
for key, value in query.items():
|
for key, value in query.items():
|
||||||
if isinstance(value, bool):
|
if isinstance(value, bool):
|
||||||
query[key] = 1 if value else 0
|
query[key] = 1 if value else 0 # type: ignore
|
||||||
elif isinstance(value, (list, dict)):
|
elif isinstance(value, (list, dict)):
|
||||||
query[key] = json.dumps(value) if value else None
|
query[key] = json.dumps(value) if value else None # type: ignore
|
||||||
|
|
||||||
query = self._prepare_lacus_query(query)
|
query = self._prepare_lacus_query(query)
|
||||||
|
|
||||||
query['priority'] = get_priority(source, user, authenticated)
|
priority = get_priority(source, user, authenticated)
|
||||||
if query['priority'] < -10:
|
query['priority'] = priority
|
||||||
|
if priority < -10:
|
||||||
# Someone is probably abusing the system with useless URLs, remove them from the index
|
# Someone is probably abusing the system with useless URLs, remove them from the index
|
||||||
query['listing'] = 0
|
query['listing'] = 0
|
||||||
try:
|
try:
|
||||||
|
@ -595,7 +617,7 @@ class Lookyloo():
|
||||||
if value:
|
if value:
|
||||||
mapping_capture[key] = json.dumps(value)
|
mapping_capture[key] = json.dumps(value)
|
||||||
elif value is not None:
|
elif value is not None:
|
||||||
mapping_capture[key] = value
|
mapping_capture[key] = value # type: ignore
|
||||||
|
|
||||||
p = self.redis.pipeline()
|
p = self.redis.pipeline()
|
||||||
p.zadd('to_capture', {perma_uuid: query['priority']})
|
p.zadd('to_capture', {perma_uuid: query['priority']})
|
||||||
|
@ -1323,7 +1345,8 @@ class Lookyloo():
|
||||||
error: Optional[str]=None, har: Optional[Dict[str, Any]]=None,
|
error: Optional[str]=None, har: Optional[Dict[str, Any]]=None,
|
||||||
png: Optional[bytes]=None, html: Optional[str]=None,
|
png: Optional[bytes]=None, html: Optional[str]=None,
|
||||||
last_redirected_url: Optional[str]=None,
|
last_redirected_url: Optional[str]=None,
|
||||||
cookies: Optional[List[Dict[str, str]]]=None
|
cookies: Optional[Union[List['Cookie'], List[Dict[str, str]]]]=None,
|
||||||
|
capture_settings: Optional[CaptureSettings]=None
|
||||||
) -> None:
|
) -> None:
|
||||||
|
|
||||||
now = datetime.now()
|
now = datetime.now()
|
||||||
|
@ -1383,4 +1406,9 @@ class Lookyloo():
|
||||||
if cookies:
|
if cookies:
|
||||||
with (dirpath / '0.cookies.json').open('w') as _cookies:
|
with (dirpath / '0.cookies.json').open('w') as _cookies:
|
||||||
json.dump(cookies, _cookies)
|
json.dump(cookies, _cookies)
|
||||||
|
|
||||||
|
if capture_settings:
|
||||||
|
with (dirpath / 'capture_settings.json').open('w') as _cs:
|
||||||
|
json.dump(capture_settings, _cs)
|
||||||
|
|
||||||
self.redis.hset('lookup_dirs', uuid, str(dirpath))
|
self.redis.hset('lookup_dirs', uuid, str(dirpath))
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import base64
|
||||||
import calendar
|
import calendar
|
||||||
import functools
|
import functools
|
||||||
import http
|
import http
|
||||||
|
@ -32,7 +33,7 @@ from werkzeug.security import check_password_hash
|
||||||
from lookyloo.default import get_config
|
from lookyloo.default import get_config
|
||||||
from lookyloo.exceptions import MissingUUID, NoValidHarFile
|
from lookyloo.exceptions import MissingUUID, NoValidHarFile
|
||||||
from lookyloo.helpers import get_taxonomies, UserAgents, load_cookies
|
from lookyloo.helpers import get_taxonomies, UserAgents, load_cookies
|
||||||
from lookyloo.lookyloo import Indexing, Lookyloo
|
from lookyloo.lookyloo import Indexing, Lookyloo, CaptureSettings
|
||||||
|
|
||||||
from .genericapi import api as generic_api
|
from .genericapi import api as generic_api
|
||||||
from .helpers import (User, build_users_table, get_secret_key,
|
from .helpers import (User, build_users_table, get_secret_key,
|
||||||
|
@ -615,7 +616,8 @@ def bulk_captures(base_tree_uuid: str):
|
||||||
cookies = load_cookies(lookyloo.get_cookies(base_tree_uuid))
|
cookies = load_cookies(lookyloo.get_cookies(base_tree_uuid))
|
||||||
bulk_captures = []
|
bulk_captures = []
|
||||||
for url in [urls[int(selected_id) - 1] for selected_id in selected_urls]:
|
for url in [urls[int(selected_id) - 1] for selected_id in selected_urls]:
|
||||||
capture = {'url': url,
|
capture: CaptureSettings = {
|
||||||
|
'url': url,
|
||||||
'cookies': cookies,
|
'cookies': cookies,
|
||||||
'referer': cache.redirects[-1] if cache.redirects else cache.url,
|
'referer': cache.redirects[-1] if cache.redirects else cache.url,
|
||||||
'user_agent': cache.user_agent,
|
'user_agent': cache.user_agent,
|
||||||
|
@ -1036,10 +1038,10 @@ def capture_web():
|
||||||
flash('Invalid submission: please submit at least a URL or a document.', 'error')
|
flash('Invalid submission: please submit at least a URL or a document.', 'error')
|
||||||
return _prepare_capture_template(user_ua=request.headers.get('User-Agent'))
|
return _prepare_capture_template(user_ua=request.headers.get('User-Agent'))
|
||||||
|
|
||||||
capture_query: Dict[str, Union[str, bytes, int, bool]] = {}
|
capture_query: CaptureSettings = {}
|
||||||
# check if the post request has the file part
|
# check if the post request has the file part
|
||||||
if 'cookies' in request.files and request.files['cookies'].filename:
|
if 'cookies' in request.files and request.files['cookies'].filename:
|
||||||
capture_query['cookies'] = request.files['cookies'].stream.read()
|
capture_query['cookies'] = load_cookies(request.files['cookies'].stream.read())
|
||||||
|
|
||||||
if request.form.get('device_name'):
|
if request.form.get('device_name'):
|
||||||
capture_query['device_name'] = request.form['device_name']
|
capture_query['device_name'] = request.form['device_name']
|
||||||
|
@ -1095,7 +1097,7 @@ def capture_web():
|
||||||
return render_template('bulk_captures.html', bulk_captures=bulk_captures)
|
return render_template('bulk_captures.html', bulk_captures=bulk_captures)
|
||||||
elif 'document' in request.files:
|
elif 'document' in request.files:
|
||||||
# File upload
|
# File upload
|
||||||
capture_query['document'] = request.files['document'].stream.read()
|
capture_query['document'] = base64.b64encode(request.files['document'].stream.read()).decode()
|
||||||
if request.files['document'].filename:
|
if request.files['document'].filename:
|
||||||
capture_query['document_name'] = request.files['document'].filename
|
capture_query['document_name'] = request.files['document'].filename
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -14,7 +14,7 @@ from lacuscore import CaptureStatus as CaptureStatusCore
|
||||||
from pylacus import CaptureStatus as CaptureStatusPy
|
from pylacus import CaptureStatus as CaptureStatusPy
|
||||||
from lookyloo.comparator import Comparator
|
from lookyloo.comparator import Comparator
|
||||||
from lookyloo.exceptions import MissingUUID
|
from lookyloo.exceptions import MissingUUID
|
||||||
from lookyloo.lookyloo import Lookyloo
|
from lookyloo.lookyloo import Lookyloo, CaptureSettings
|
||||||
|
|
||||||
from .helpers import build_users_table, load_user_from_request, src_request_ip
|
from .helpers import build_users_table, load_user_from_request, src_request_ip
|
||||||
|
|
||||||
|
@ -396,7 +396,8 @@ class SubmitCapture(Resource):
|
||||||
if 'url' not in request.args or not request.args.get('url'):
|
if 'url' not in request.args or not request.args.get('url'):
|
||||||
return 'No "url" in the URL params, nothting to capture.', 400
|
return 'No "url" in the URL params, nothting to capture.', 400
|
||||||
|
|
||||||
to_query = {'url': request.args['url'],
|
to_query: CaptureSettings = {
|
||||||
|
'url': request.args['url'],
|
||||||
'listing': False if 'listing' in request.args and request.args['listing'] in [0, '0'] else True}
|
'listing': False if 'listing' in request.args and request.args['listing'] in [0, '0'] else True}
|
||||||
if request.args.get('user_agent'):
|
if request.args.get('user_agent'):
|
||||||
to_query['user_agent'] = request.args['user_agent']
|
to_query['user_agent'] = request.args['user_agent']
|
||||||
|
@ -421,7 +422,7 @@ class SubmitCapture(Resource):
|
||||||
user = flask_login.current_user.get_id()
|
user = flask_login.current_user.get_id()
|
||||||
else:
|
else:
|
||||||
user = src_request_ip(request)
|
user = src_request_ip(request)
|
||||||
to_query: Dict = request.get_json(force=True)
|
to_query: CaptureSettings = request.get_json(force=True)
|
||||||
perma_uuid = lookyloo.enqueue_capture(to_query, source='api', user=user, authenticated=flask_login.current_user.is_authenticated)
|
perma_uuid = lookyloo.enqueue_capture(to_query, source='api', user=user, authenticated=flask_login.current_user.is_authenticated)
|
||||||
return perma_uuid
|
return perma_uuid
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue