new: Store capture settings, use TypedDict whenever possible.

pull/699/head
Raphaël Vinot 2023-05-15 16:08:19 +02:00
parent 1603c99d5e
commit 582b5956e9
5 changed files with 86 additions and 48 deletions

View File

@ -7,12 +7,12 @@ import logging.config
import signal import signal
from pathlib import Path from pathlib import Path
from typing import Dict, Optional, Set, Union from typing import Optional, Set, Union
from lacuscore import LacusCore, CaptureStatus as CaptureStatusCore, CaptureResponse as CaptureResponseCore from lacuscore import LacusCore, CaptureStatus as CaptureStatusCore, CaptureResponse as CaptureResponseCore
from pylacus import PyLacus, CaptureStatus as CaptureStatusPy, CaptureResponse as CaptureResponsePy from pylacus import PyLacus, CaptureStatus as CaptureStatusPy, CaptureResponse as CaptureResponsePy
from lookyloo.lookyloo import Lookyloo from lookyloo.lookyloo import Lookyloo, CaptureSettings
from lookyloo.default import AbstractManager, get_config from lookyloo.default import AbstractManager, get_config
from lookyloo.helpers import get_captures_dir from lookyloo.helpers import get_captures_dir
@ -73,14 +73,14 @@ class AsyncCapture(AbstractManager):
self.lookyloo.redis.sadd('ongoing', uuid) self.lookyloo.redis.sadd('ongoing', uuid)
queue: Optional[str] = self.lookyloo.redis.getdel(f'{uuid}_mgmt') queue: Optional[str] = self.lookyloo.redis.getdel(f'{uuid}_mgmt')
to_capture: Dict[str, str] = self.lookyloo.redis.hgetall(uuid) to_capture: CaptureSettings = self.lookyloo.redis.hgetall(uuid)
if get_config('generic', 'default_public'): if get_config('generic', 'default_public'):
# By default, the captures are on the index, unless the user mark them as un-listed # By default, the captures are on the index, unless the user mark them as un-listed
listing = False if ('listing' in to_capture and to_capture['listing'].lower() in ['false', '0', '']) else True listing = False if ('listing' in to_capture and to_capture['listing'].lower() in ['false', '0', '']) else True # type: ignore
else: else:
# By default, the captures are not on the index, unless the user mark them as listed # By default, the captures are not on the index, unless the user mark them as listed
listing = True if ('listing' in to_capture and to_capture['listing'].lower() in ['true', '1']) else False listing = True if ('listing' in to_capture and to_capture['listing'].lower() in ['true', '1']) else False # type: ignore
self.lookyloo.store_capture( self.lookyloo.store_capture(
uuid, listing, uuid, listing,
@ -91,11 +91,15 @@ class AsyncCapture(AbstractManager):
error=entries.get('error'), har=entries.get('har'), error=entries.get('error'), har=entries.get('har'),
png=entries.get('png'), html=entries.get('html'), png=entries.get('png'), html=entries.get('html'),
last_redirected_url=entries.get('last_redirected_url'), last_redirected_url=entries.get('last_redirected_url'),
cookies=entries.get('cookies') # type: ignore cookies=entries.get('cookies'),
capture_settings=to_capture
) )
if ('auto_report' in to_capture): if 'auto_report' in to_capture:
settings = json.loads(to_capture['auto_report']) if isinstance(to_capture['auto_report'], str):
settings = json.loads(to_capture['auto_report'])
else:
settings = to_capture['auto_report']
if settings.get('email'): if settings.get('email'):
self.lookyloo.send_mail(uuid, email=settings['email'], self.lookyloo.send_mail(uuid, email=settings['email'],
comment=settings.get('comment')) comment=settings.get('comment'))

View File

@ -151,7 +151,7 @@ def load_known_content(directory: str='known_content') -> Dict[str, Dict[str, An
return to_return return to_return
def load_cookies(cookie_pseudofile: Optional[Union[BufferedIOBase, str, bytes]]=None) -> List[Dict[str, Union[str, bool]]]: def load_cookies(cookie_pseudofile: Optional[Union[BufferedIOBase, str, bytes, List[Dict[str, Union[str, bool]]]]]=None) -> List[Dict[str, Union[str, bool]]]:
cookies: List[Dict[str, Union[str, bool]]] cookies: List[Dict[str, Union[str, bool]]]
if cookie_pseudofile: if cookie_pseudofile:
if isinstance(cookie_pseudofile, (str, bytes)): if isinstance(cookie_pseudofile, (str, bytes)):
@ -160,13 +160,16 @@ def load_cookies(cookie_pseudofile: Optional[Union[BufferedIOBase, str, bytes]]=
except json.decoder.JSONDecodeError: except json.decoder.JSONDecodeError:
logger.warning(f'Unable to load json content: {cookie_pseudofile!r}') logger.warning(f'Unable to load json content: {cookie_pseudofile!r}')
return [] return []
else: elif isinstance(cookie_pseudofile, BufferedIOBase):
# Note: we might have an empty BytesIO, which is not False. # Note: we might have an empty BytesIO, which is not False.
try: try:
cookies = json.load(cookie_pseudofile) cookies = json.load(cookie_pseudofile)
except json.decoder.JSONDecodeError: except json.decoder.JSONDecodeError:
logger.warning(f'Unable to load json content: {cookie_pseudofile}') logger.warning(f'Unable to load json content: {cookie_pseudofile}')
return [] return []
else:
# Already a dict
cookies = cookie_pseudofile
else: else:
if not (get_homedir() / 'cookies.json').exists(): if not (get_homedir() / 'cookies.json').exists():
return [] return []

View File

@ -12,8 +12,7 @@ from email.message import EmailMessage
from functools import cached_property from functools import cached_property
from io import BytesIO from io import BytesIO
from pathlib import Path from pathlib import Path
from typing import (Any, Dict, Iterable, List, MutableMapping, Optional, Set, from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union, TYPE_CHECKING
Tuple, Union)
from urllib.parse import urlparse from urllib.parse import urlparse
from uuid import uuid4 from uuid import uuid4
from zipfile import ZipFile from zipfile import ZipFile
@ -21,17 +20,18 @@ from zipfile import ZipFile
from defang import defang # type: ignore from defang import defang # type: ignore
from har2tree import CrawledTree, HostNode, URLNode from har2tree import CrawledTree, HostNode, URLNode
from lacuscore import (LacusCore, from lacuscore import (LacusCore,
CaptureStatus as CaptureStatusCore) CaptureStatus as CaptureStatusCore,
# CaptureResponse as CaptureResponseCore, # CaptureResponse as CaptureResponseCore)
# CaptureResponseJson as CaptureResponseJsonCore, # CaptureResponseJson as CaptureResponseJsonCore,
# CaptureSettings as CaptureSettingsCore) CaptureSettings as CaptureSettingsCore)
from PIL import Image, UnidentifiedImageError from PIL import Image, UnidentifiedImageError
from playwrightcapture import get_devices from playwrightcapture import get_devices
from pylacus import (PyLacus, from pylacus import (PyLacus,
CaptureStatus as CaptureStatusPy) CaptureStatus as CaptureStatusPy
# CaptureResponse as CaptureResponsePy, # CaptureResponse as CaptureResponsePy,
# CaptureResponseJson as CaptureResponseJsonPy, # CaptureResponseJson as CaptureResponseJsonPy,
# CaptureSettings as CaptureSettingsPy) # CaptureSettings as CaptureSettingsPy
)
from pymisp import MISPAttribute, MISPEvent, MISPObject from pymisp import MISPAttribute, MISPEvent, MISPObject
from pysecuritytxt import PySecurityTXT, SecurityTXTNotAvailable from pysecuritytxt import PySecurityTXT, SecurityTXTNotAvailable
from pylookyloomonitoring import PyLookylooMonitoring from pylookyloomonitoring import PyLookylooMonitoring
@ -52,6 +52,20 @@ from .modules import (MISP, PhishingInitiative, UniversalWhois,
UrlScan, VirusTotal, Phishtank, Hashlookup, UrlScan, VirusTotal, Phishtank, Hashlookup,
RiskIQ, RiskIQError, Pandora, URLhaus) RiskIQ, RiskIQError, Pandora, URLhaus)
if TYPE_CHECKING:
from playwright.async_api import Cookie
class CaptureSettings(CaptureSettingsCore, total=False):
'''The capture settings that can be passed to Lookyloo'''
listing: Optional[int]
not_queued: Optional[int]
auto_report: Optional[Union[str, Dict[str, str]]]
dnt: Optional[str]
browser_name: Optional[str]
os: Optional[str]
parent: Optional[str]
class Lookyloo(): class Lookyloo():
@ -499,13 +513,20 @@ class Lookyloo():
self._captures_index.reload_cache(capture_uuid) self._captures_index.reload_cache(capture_uuid)
return self._captures_index[capture_uuid].tree return self._captures_index[capture_uuid].tree
def _prepare_lacus_query(self, query: MutableMapping[str, Any]) -> MutableMapping[str, Any]: def _prepare_lacus_query(self, query: CaptureSettings) -> CaptureSettings:
query = {k: v for k, v in query.items() if v is not None} # Remove the none, it makes redis unhappy # Remove the none, it makes redis unhappy
query = {k: v for k, v in query.items() if v is not None} # type: ignore
# NOTE: Lookyloo' capture can pass a do not track header independently from the default headers, merging it here # NOTE: Lookyloo' capture can pass a do not track header independently from the default headers, merging it here
headers = query.pop('headers', '') headers = query.pop('headers', {})
if 'dnt' in query: if 'dnt' in query:
headers += f'\nDNT: {query.pop("dnt")}' if isinstance(headers, str):
headers = headers.strip() headers += f'\nDNT: {query.pop("dnt")}'
headers = headers.strip()
elif isinstance(headers, dict):
dnt_entry = query.pop("dnt")
if dnt_entry:
headers['DNT'] = dnt_entry.strip()
if headers: if headers:
query['headers'] = headers query['headers'] = headers
@ -521,7 +542,7 @@ class Lookyloo():
query['user_agent'] = user_agent if user_agent else self.user_agents.default['useragent'] query['user_agent'] = user_agent if user_agent else self.user_agents.default['useragent']
# NOTE: the document must be base64 encoded # NOTE: the document must be base64 encoded
document = query.pop('document', None) document: Optional[Union[str, bytes]] = query.pop('document', None)
if document: if document:
if isinstance(document, bytes): if isinstance(document, bytes):
query['document'] = base64.b64encode(document).decode() query['document'] = base64.b64encode(document).decode()
@ -529,7 +550,7 @@ class Lookyloo():
query['document'] = document query['document'] = document
return query return query
def enqueue_capture(self, query: MutableMapping[str, Any], source: str, user: str, authenticated: bool) -> str: def enqueue_capture(self, query: CaptureSettings, source: str, user: str, authenticated: bool) -> str:
'''Enqueue a query in the capture queue (used by the UI and the API for asynchronous processing)''' '''Enqueue a query in the capture queue (used by the UI and the API for asynchronous processing)'''
def get_priority(source: str, user: str, authenticated: bool) -> int: def get_priority(source: str, user: str, authenticated: bool) -> int:
@ -547,14 +568,15 @@ class Lookyloo():
for key, value in query.items(): for key, value in query.items():
if isinstance(value, bool): if isinstance(value, bool):
query[key] = 1 if value else 0 query[key] = 1 if value else 0 # type: ignore
elif isinstance(value, (list, dict)): elif isinstance(value, (list, dict)):
query[key] = json.dumps(value) if value else None query[key] = json.dumps(value) if value else None # type: ignore
query = self._prepare_lacus_query(query) query = self._prepare_lacus_query(query)
query['priority'] = get_priority(source, user, authenticated) priority = get_priority(source, user, authenticated)
if query['priority'] < -10: query['priority'] = priority
if priority < -10:
# Someone is probably abusing the system with useless URLs, remove them from the index # Someone is probably abusing the system with useless URLs, remove them from the index
query['listing'] = 0 query['listing'] = 0
try: try:
@ -595,7 +617,7 @@ class Lookyloo():
if value: if value:
mapping_capture[key] = json.dumps(value) mapping_capture[key] = json.dumps(value)
elif value is not None: elif value is not None:
mapping_capture[key] = value mapping_capture[key] = value # type: ignore
p = self.redis.pipeline() p = self.redis.pipeline()
p.zadd('to_capture', {perma_uuid: query['priority']}) p.zadd('to_capture', {perma_uuid: query['priority']})
@ -1323,7 +1345,8 @@ class Lookyloo():
error: Optional[str]=None, har: Optional[Dict[str, Any]]=None, error: Optional[str]=None, har: Optional[Dict[str, Any]]=None,
png: Optional[bytes]=None, html: Optional[str]=None, png: Optional[bytes]=None, html: Optional[str]=None,
last_redirected_url: Optional[str]=None, last_redirected_url: Optional[str]=None,
cookies: Optional[List[Dict[str, str]]]=None cookies: Optional[Union[List['Cookie'], List[Dict[str, str]]]]=None,
capture_settings: Optional[CaptureSettings]=None
) -> None: ) -> None:
now = datetime.now() now = datetime.now()
@ -1383,4 +1406,9 @@ class Lookyloo():
if cookies: if cookies:
with (dirpath / '0.cookies.json').open('w') as _cookies: with (dirpath / '0.cookies.json').open('w') as _cookies:
json.dump(cookies, _cookies) json.dump(cookies, _cookies)
if capture_settings:
with (dirpath / 'capture_settings.json').open('w') as _cs:
json.dump(capture_settings, _cs)
self.redis.hset('lookup_dirs', uuid, str(dirpath)) self.redis.hset('lookup_dirs', uuid, str(dirpath))

View File

@ -1,5 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import base64
import calendar import calendar
import functools import functools
import http import http
@ -32,7 +33,7 @@ from werkzeug.security import check_password_hash
from lookyloo.default import get_config from lookyloo.default import get_config
from lookyloo.exceptions import MissingUUID, NoValidHarFile from lookyloo.exceptions import MissingUUID, NoValidHarFile
from lookyloo.helpers import get_taxonomies, UserAgents, load_cookies from lookyloo.helpers import get_taxonomies, UserAgents, load_cookies
from lookyloo.lookyloo import Indexing, Lookyloo from lookyloo.lookyloo import Indexing, Lookyloo, CaptureSettings
from .genericapi import api as generic_api from .genericapi import api as generic_api
from .helpers import (User, build_users_table, get_secret_key, from .helpers import (User, build_users_table, get_secret_key,
@ -615,13 +616,14 @@ def bulk_captures(base_tree_uuid: str):
cookies = load_cookies(lookyloo.get_cookies(base_tree_uuid)) cookies = load_cookies(lookyloo.get_cookies(base_tree_uuid))
bulk_captures = [] bulk_captures = []
for url in [urls[int(selected_id) - 1] for selected_id in selected_urls]: for url in [urls[int(selected_id) - 1] for selected_id in selected_urls]:
capture = {'url': url, capture: CaptureSettings = {
'cookies': cookies, 'url': url,
'referer': cache.redirects[-1] if cache.redirects else cache.url, 'cookies': cookies,
'user_agent': cache.user_agent, 'referer': cache.redirects[-1] if cache.redirects else cache.url,
'parent': base_tree_uuid, 'user_agent': cache.user_agent,
'listing': False if cache and cache.no_index else True 'parent': base_tree_uuid,
} 'listing': False if cache and cache.no_index else True
}
new_capture_uuid = lookyloo.enqueue_capture(capture, source='web', user=user, authenticated=flask_login.current_user.is_authenticated) new_capture_uuid = lookyloo.enqueue_capture(capture, source='web', user=user, authenticated=flask_login.current_user.is_authenticated)
bulk_captures.append((new_capture_uuid, url)) bulk_captures.append((new_capture_uuid, url))
@ -1036,10 +1038,10 @@ def capture_web():
flash('Invalid submission: please submit at least a URL or a document.', 'error') flash('Invalid submission: please submit at least a URL or a document.', 'error')
return _prepare_capture_template(user_ua=request.headers.get('User-Agent')) return _prepare_capture_template(user_ua=request.headers.get('User-Agent'))
capture_query: Dict[str, Union[str, bytes, int, bool]] = {} capture_query: CaptureSettings = {}
# check if the post request has the file part # check if the post request has the file part
if 'cookies' in request.files and request.files['cookies'].filename: if 'cookies' in request.files and request.files['cookies'].filename:
capture_query['cookies'] = request.files['cookies'].stream.read() capture_query['cookies'] = load_cookies(request.files['cookies'].stream.read())
if request.form.get('device_name'): if request.form.get('device_name'):
capture_query['device_name'] = request.form['device_name'] capture_query['device_name'] = request.form['device_name']
@ -1095,7 +1097,7 @@ def capture_web():
return render_template('bulk_captures.html', bulk_captures=bulk_captures) return render_template('bulk_captures.html', bulk_captures=bulk_captures)
elif 'document' in request.files: elif 'document' in request.files:
# File upload # File upload
capture_query['document'] = request.files['document'].stream.read() capture_query['document'] = base64.b64encode(request.files['document'].stream.read()).decode()
if request.files['document'].filename: if request.files['document'].filename:
capture_query['document_name'] = request.files['document'].filename capture_query['document_name'] = request.files['document'].filename
else: else:

View File

@ -14,7 +14,7 @@ from lacuscore import CaptureStatus as CaptureStatusCore
from pylacus import CaptureStatus as CaptureStatusPy from pylacus import CaptureStatus as CaptureStatusPy
from lookyloo.comparator import Comparator from lookyloo.comparator import Comparator
from lookyloo.exceptions import MissingUUID from lookyloo.exceptions import MissingUUID
from lookyloo.lookyloo import Lookyloo from lookyloo.lookyloo import Lookyloo, CaptureSettings
from .helpers import build_users_table, load_user_from_request, src_request_ip from .helpers import build_users_table, load_user_from_request, src_request_ip
@ -396,8 +396,9 @@ class SubmitCapture(Resource):
if 'url' not in request.args or not request.args.get('url'): if 'url' not in request.args or not request.args.get('url'):
return 'No "url" in the URL params, nothting to capture.', 400 return 'No "url" in the URL params, nothting to capture.', 400
to_query = {'url': request.args['url'], to_query: CaptureSettings = {
'listing': False if 'listing' in request.args and request.args['listing'] in [0, '0'] else True} 'url': request.args['url'],
'listing': False if 'listing' in request.args and request.args['listing'] in [0, '0'] else True}
if request.args.get('user_agent'): if request.args.get('user_agent'):
to_query['user_agent'] = request.args['user_agent'] to_query['user_agent'] = request.args['user_agent']
if request.args.get('browser_name'): if request.args.get('browser_name'):
@ -421,7 +422,7 @@ class SubmitCapture(Resource):
user = flask_login.current_user.get_id() user = flask_login.current_user.get_id()
else: else:
user = src_request_ip(request) user = src_request_ip(request)
to_query: Dict = request.get_json(force=True) to_query: CaptureSettings = request.get_json(force=True)
perma_uuid = lookyloo.enqueue_capture(to_query, source='api', user=user, authenticated=flask_login.current_user.is_authenticated) perma_uuid = lookyloo.enqueue_capture(to_query, source='api', user=user, authenticated=flask_login.current_user.is_authenticated)
return perma_uuid return perma_uuid