chg: Use new annotations

pull/862/head
Raphaël Vinot 2024-01-12 17:15:41 +01:00
parent 0b5128e5b4
commit ee1ad48b25
49 changed files with 749 additions and 657 deletions

View File

@ -3,14 +3,14 @@
exclude: "user_agents|website/web/sri.txt" exclude: "user_agents|website/web/sri.txt"
repos: repos:
- repo: https://github.com/pre-commit/pre-commit-hooks - repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.1.0 rev: v4.5.0
hooks: hooks:
- id: trailing-whitespace - id: trailing-whitespace
- id: end-of-file-fixer - id: end-of-file-fixer
- id: check-yaml - id: check-yaml
- id: check-added-large-files - id: check-added-large-files
- repo: https://github.com/asottile/pyupgrade - repo: https://github.com/asottile/pyupgrade
rev: v2.31.1 rev: v3.15.0
hooks: hooks:
- id: pyupgrade - id: pyupgrade
args: [--py38-plus] args: [--py38-plus]

View File

@ -1,5 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import csv import csv
import gzip import gzip
import logging import logging
@ -23,7 +25,7 @@ logging.config.dictConfig(get_config('logging'))
class Archiver(AbstractManager): class Archiver(AbstractManager):
def __init__(self, loglevel: Optional[int]=None): def __init__(self, loglevel: int | None=None) -> None:
super().__init__(loglevel) super().__init__(loglevel)
self.script_name = 'archiver' self.script_name = 'archiver'
self.redis = Redis(unix_socket_path=get_socket_path('cache')) self.redis = Redis(unix_socket_path=get_socket_path('cache'))
@ -54,7 +56,7 @@ class Archiver(AbstractManager):
self.s3fs_bucket = s3fs_config['config']['bucket_name'] self.s3fs_bucket = s3fs_config['config']['bucket_name']
self.s3fs_client.clear_multipart_uploads(self.s3fs_bucket) self.s3fs_client.clear_multipart_uploads(self.s3fs_bucket)
def _to_run_forever(self): def _to_run_forever(self) -> None:
archiving_done = False archiving_done = False
# NOTE: When we archive a big directory, moving *a lot* of files, expecially to MinIO # NOTE: When we archive a big directory, moving *a lot* of files, expecially to MinIO
# can take a very long time. In order to avoid being stuck on the archiving, we break that in chunks # can take a very long time. In order to avoid being stuck on the archiving, we break that in chunks
@ -71,14 +73,14 @@ class Archiver(AbstractManager):
# This call takes a very long time on MinIO # This call takes a very long time on MinIO
self._update_all_capture_indexes() self._update_all_capture_indexes()
def _update_index(self, root_dir: Path, *, s3fs_parent_dir: Optional[str]=None) -> Optional[Path]: def _update_index(self, root_dir: Path, *, s3fs_parent_dir: str | None=None) -> Path | None:
# returns a path to the index for the given directory # returns a path to the index for the given directory
logmsg = f'Updating index for {root_dir}' logmsg = f'Updating index for {root_dir}'
if s3fs_parent_dir: if s3fs_parent_dir:
logmsg = f'{logmsg} (s3fs)' logmsg = f'{logmsg} (s3fs)'
self.logger.info(logmsg) self.logger.info(logmsg)
current_index: Dict[str, str] = {} current_index: dict[str, str] = {}
index_file = root_dir / 'index' index_file = root_dir / 'index'
if index_file.exists(): if index_file.exists():
try: try:
@ -91,11 +93,11 @@ class Archiver(AbstractManager):
# NOTE: should we remove if it has subs? # NOTE: should we remove if it has subs?
index_file.unlink() index_file.unlink()
sub_indexes: List[Path] = [] sub_indexes: list[Path] = []
current_index_dirs: Set[str] = set(current_index.values()) current_index_dirs: set[str] = set(current_index.values())
new_captures: Set[Path] = set() new_captures: set[Path] = set()
# Directories that are actually in the listing. # Directories that are actually in the listing.
current_dirs: Set[str] = set() current_dirs: set[str] = set()
if s3fs_parent_dir: if s3fs_parent_dir:
s3fs_dir = '/'.join([s3fs_parent_dir, root_dir.name]) s3fs_dir = '/'.join([s3fs_parent_dir, root_dir.name])
@ -212,7 +214,7 @@ class Archiver(AbstractManager):
return index_file return index_file
def _update_all_capture_indexes(self, *, recent_only: bool=False): def _update_all_capture_indexes(self, *, recent_only: bool=False) -> None:
'''Run that after the captures are in the proper directories''' '''Run that after the captures are in the proper directories'''
# Recent captures # Recent captures
self.logger.info('Update recent indexes') self.logger.info('Update recent indexes')
@ -278,7 +280,7 @@ class Archiver(AbstractManager):
return dest_dir / capture_path.name return dest_dir / capture_path.name
def _archive(self): def _archive(self) -> bool:
archive_interval = timedelta(days=get_config('generic', 'archive')) archive_interval = timedelta(days=get_config('generic', 'archive'))
cut_time = (datetime.now() - archive_interval) cut_time = (datetime.now() - archive_interval)
self.logger.info(f'Archiving all captures older than {cut_time.isoformat()}.') self.logger.info(f'Archiving all captures older than {cut_time.isoformat()}.')
@ -340,7 +342,7 @@ class Archiver(AbstractManager):
self.logger.info('Archiving done.') self.logger.info('Archiving done.')
return archiving_done return archiving_done
def __load_index(self, index_path: Path, ignore_sub: bool=False) -> Dict[str, str]: def __load_index(self, index_path: Path, ignore_sub: bool=False) -> dict[str, str]:
'''Loads the given index file and all the subsequent ones if they exist''' '''Loads the given index file and all the subsequent ones if they exist'''
# NOTE: this method is used on recent and archived captures, it must never trigger a dir listing # NOTE: this method is used on recent and archived captures, it must never trigger a dir listing
indexed_captures = {} indexed_captures = {}
@ -359,7 +361,7 @@ class Archiver(AbstractManager):
indexed_captures[key] = str(index_path.parent / path_name) indexed_captures[key] = str(index_path.parent / path_name)
return indexed_captures return indexed_captures
def _load_indexes(self): def _load_indexes(self) -> None:
# capture_dir / Year / Month / index <- should always exists. If not, created by _update_index # capture_dir / Year / Month / index <- should always exists. If not, created by _update_index
# Initialize recent index # Initialize recent index
for index in sorted(get_captures_dir().glob('*/*/index'), reverse=True): for index in sorted(get_captures_dir().glob('*/*/index'), reverse=True):
@ -391,7 +393,7 @@ class Archiver(AbstractManager):
self.logger.info(f'Archived indexes loaded: {total_archived_captures} entries.') self.logger.info(f'Archived indexes loaded: {total_archived_captures} entries.')
def main(): def main() -> None:
a = Archiver() a = Archiver()
a.run(sleep_in_sec=3600) a.run(sleep_in_sec=3600)

View File

@ -1,5 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import asyncio import asyncio
import json import json
import logging import logging
@ -10,7 +12,7 @@ from pathlib import Path
from typing import Optional, Set, Union from typing import Optional, Set, Union
from lacuscore import LacusCore, CaptureStatus as CaptureStatusCore, CaptureResponse as CaptureResponseCore from lacuscore import LacusCore, CaptureStatus as CaptureStatusCore, CaptureResponse as CaptureResponseCore
from pylacus import PyLacus, CaptureStatus as CaptureStatusPy, CaptureResponse as CaptureResponsePy from pylacus import PyLacus, CaptureStatus as CaptureStatusPy, CaptureResponse as CaptureResponsePy # type: ignore[attr-defined]
from lookyloo.lookyloo import Lookyloo, CaptureSettings from lookyloo.lookyloo import Lookyloo, CaptureSettings
from lookyloo.default import AbstractManager, get_config from lookyloo.default import AbstractManager, get_config
@ -23,7 +25,7 @@ logging.config.dictConfig(get_config('logging'))
class AsyncCapture(AbstractManager): class AsyncCapture(AbstractManager):
def __init__(self, loglevel: Optional[int]=None): def __init__(self, loglevel: int | None=None) -> None:
super().__init__(loglevel) super().__init__(loglevel)
self.script_name = 'async_capture' self.script_name = 'async_capture'
self.only_global_lookups: bool = get_config('generic', 'only_global_lookups') self.only_global_lookups: bool = get_config('generic', 'only_global_lookups')
@ -31,7 +33,7 @@ class AsyncCapture(AbstractManager):
self.lookyloo = Lookyloo() self.lookyloo = Lookyloo()
if isinstance(self.lookyloo.lacus, LacusCore): if isinstance(self.lookyloo.lacus, LacusCore):
self.captures: Set[asyncio.Task] = set() self.captures: set[asyncio.Task] = set() # type: ignore[type-arg]
self.fox = FOX(config_name='FOX') self.fox = FOX(config_name='FOX')
if not self.fox.available: if not self.fox.available:
@ -41,23 +43,24 @@ class AsyncCapture(AbstractManager):
if self.fox.available: if self.fox.available:
self.fox.capture_default_trigger(url, auto_trigger=True) self.fox.capture_default_trigger(url, auto_trigger=True)
async def _trigger_captures(self): async def _trigger_captures(self) -> None:
# Only called if LacusCore is used
max_new_captures = get_config('generic', 'async_capture_processes') - len(self.captures) max_new_captures = get_config('generic', 'async_capture_processes') - len(self.captures)
self.logger.debug(f'{len(self.captures)} ongoing captures.') self.logger.debug(f'{len(self.captures)} ongoing captures.')
if max_new_captures <= 0: if max_new_captures <= 0:
self.logger.info(f'Max amount of captures in parallel reached ({len(self.captures)})') self.logger.info(f'Max amount of captures in parallel reached ({len(self.captures)})')
return return None
for capture_task in self.lookyloo.lacus.consume_queue(max_new_captures): for capture_task in self.lookyloo.lacus.consume_queue(max_new_captures): # type: ignore[union-attr]
self.captures.add(capture_task) self.captures.add(capture_task)
capture_task.add_done_callback(self.captures.discard) capture_task.add_done_callback(self.captures.discard)
def uuids_ready(self): def uuids_ready(self) -> list[str]:
return [uuid for uuid in self.lookyloo.redis.zrevrangebyscore('to_capture', 'Inf', '-Inf') return [uuid for uuid in self.lookyloo.redis.zrevrangebyscore('to_capture', 'Inf', '-Inf')
if uuid and self.lookyloo.lacus.get_capture_status(uuid) in [CaptureStatusPy.DONE, CaptureStatusCore]] if uuid and self.lookyloo.lacus.get_capture_status(uuid) in [CaptureStatusPy.DONE, CaptureStatusCore]]
def process_capture_queue(self) -> None: def process_capture_queue(self) -> None:
'''Process a query from the capture queue''' '''Process a query from the capture queue'''
entries: Union[CaptureResponseCore, CaptureResponsePy] entries: CaptureResponseCore | CaptureResponsePy
for uuid in self.uuids_ready(): for uuid in self.uuids_ready():
if isinstance(self.lookyloo.lacus, LacusCore): if isinstance(self.lookyloo.lacus, LacusCore):
entries = self.lookyloo.lacus.get_capture(uuid, decode=True) entries = self.lookyloo.lacus.get_capture(uuid, decode=True)
@ -71,9 +74,9 @@ class AsyncCapture(AbstractManager):
self.logger.info(log) self.logger.info(log)
self.lookyloo.redis.sadd('ongoing', uuid) self.lookyloo.redis.sadd('ongoing', uuid)
queue: Optional[str] = self.lookyloo.redis.getdel(f'{uuid}_mgmt') queue: str | None = self.lookyloo.redis.getdel(f'{uuid}_mgmt')
to_capture: CaptureSettings = self.lookyloo.redis.hgetall(uuid) to_capture: CaptureSettings = self.lookyloo.redis.hgetall(uuid) # type: ignore[assignment]
if get_config('generic', 'default_public'): if get_config('generic', 'default_public'):
# By default, the captures are on the index, unless the user mark them as un-listed # By default, the captures are on the index, unless the user mark them as un-listed
@ -123,9 +126,9 @@ class AsyncCapture(AbstractManager):
self.unset_running() self.unset_running()
self.logger.info(f'Done with {uuid}') self.logger.info(f'Done with {uuid}')
async def _to_run_forever_async(self): async def _to_run_forever_async(self) -> None:
if self.force_stop: if self.force_stop:
return return None
if isinstance(self.lookyloo.lacus, LacusCore): if isinstance(self.lookyloo.lacus, LacusCore):
await self._trigger_captures() await self._trigger_captures()
@ -135,7 +138,7 @@ class AsyncCapture(AbstractManager):
self.process_capture_queue() self.process_capture_queue()
async def _wait_to_finish_async(self): async def _wait_to_finish_async(self) -> None:
if isinstance(self.lookyloo.lacus, LacusCore): if isinstance(self.lookyloo.lacus, LacusCore):
while self.captures: while self.captures:
self.logger.info(f'Waiting for {len(self.captures)} capture(s) to finish...') self.logger.info(f'Waiting for {len(self.captures)} capture(s) to finish...')
@ -147,7 +150,7 @@ class AsyncCapture(AbstractManager):
self.logger.info('No more captures') self.logger.info('No more captures')
def main(): def main() -> None:
m = AsyncCapture() m = AsyncCapture()
loop = asyncio.new_event_loop() loop = asyncio.new_event_loop()

View File

@ -1,5 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import logging import logging
import logging.config import logging.config
import os import os
@ -20,7 +22,7 @@ logging.config.dictConfig(get_config('logging'))
class BackgroundIndexer(AbstractManager): class BackgroundIndexer(AbstractManager):
def __init__(self, loglevel: Optional[int]=None): def __init__(self, loglevel: int | None=None):
super().__init__(loglevel) super().__init__(loglevel)
self.lookyloo = Lookyloo() self.lookyloo = Lookyloo()
self.script_name = 'background_indexer' self.script_name = 'background_indexer'
@ -28,7 +30,7 @@ class BackgroundIndexer(AbstractManager):
self.discarded_captures_dir = self.lookyloo.capture_dir.parent / 'discarded_captures' self.discarded_captures_dir = self.lookyloo.capture_dir.parent / 'discarded_captures'
self.discarded_captures_dir.mkdir(parents=True, exist_ok=True) self.discarded_captures_dir.mkdir(parents=True, exist_ok=True)
def _to_run_forever(self): def _to_run_forever(self) -> None:
all_done = self._build_missing_pickles() all_done = self._build_missing_pickles()
if all_done: if all_done:
self._check_indexes() self._check_indexes()
@ -72,7 +74,7 @@ class BackgroundIndexer(AbstractManager):
# The capture with this UUID exists, but it is for some reason missing in lookup_dirs # The capture with this UUID exists, but it is for some reason missing in lookup_dirs
self.lookyloo.redis.hset('lookup_dirs', uuid, str(path)) self.lookyloo.redis.hset('lookup_dirs', uuid, str(path))
else: else:
cached_path = Path(self.lookyloo.redis.hget('lookup_dirs', uuid)) cached_path = Path(self.lookyloo.redis.hget('lookup_dirs', uuid)) # type: ignore[arg-type]
if cached_path != path: if cached_path != path:
# we have a duplicate UUID, it is proably related to some bad copy/paste # we have a duplicate UUID, it is proably related to some bad copy/paste
if cached_path.exists(): if cached_path.exists():
@ -118,13 +120,13 @@ class BackgroundIndexer(AbstractManager):
return True return True
return False return False
def _check_indexes(self): def _check_indexes(self) -> None:
index_redis = self.lookyloo.indexing.redis index_redis = self.lookyloo.indexing.redis
can_index = index_redis.set('ongoing_indexing', 1, ex=3600, nx=True) can_index = index_redis.set('ongoing_indexing', 1, ex=3600, nx=True)
if not can_index: if not can_index:
# There is no reason to run this method in multiple scripts. # There is no reason to run this method in multiple scripts.
self.logger.info('Indexing already ongoing in another process.') self.logger.info('Indexing already ongoing in another process.')
return return None
self.logger.info('Check indexes...') self.logger.info('Check indexes...')
for cache in self.lookyloo.sorted_capture_cache(cached_captures_only=False): for cache in self.lookyloo.sorted_capture_cache(cached_captures_only=False):
if self.lookyloo.is_public_instance and cache.no_index: if self.lookyloo.is_public_instance and cache.no_index:
@ -163,7 +165,7 @@ class BackgroundIndexer(AbstractManager):
self.logger.info('... done.') self.logger.info('... done.')
def main(): def main() -> None:
i = BackgroundIndexer() i = BackgroundIndexer()
i.run(sleep_in_sec=60) i.run(sleep_in_sec=60)

View File

@ -1,5 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import json import json
import time import time
import logging import logging
@ -8,7 +10,7 @@ from collections import Counter
from datetime import date, timedelta from datetime import date, timedelta
from typing import Any, Dict, Optional from typing import Any, Dict, Optional
from lookyloo.lookyloo import Lookyloo, CaptureStatusCore, CaptureStatusPy from lookyloo.lookyloo import Lookyloo, CaptureStatusCore, CaptureStatusPy # type: ignore[attr-defined]
from lookyloo.default import AbstractManager, get_config, get_homedir, safe_create_dir from lookyloo.default import AbstractManager, get_config, get_homedir, safe_create_dir
from lookyloo.helpers import ParsedUserAgent, serialize_to_json from lookyloo.helpers import ParsedUserAgent, serialize_to_json
@ -17,19 +19,19 @@ logging.config.dictConfig(get_config('logging'))
class Processing(AbstractManager): class Processing(AbstractManager):
def __init__(self, loglevel: Optional[int]=None): def __init__(self, loglevel: int | None=None):
super().__init__(loglevel) super().__init__(loglevel)
self.script_name = 'processing' self.script_name = 'processing'
self.lookyloo = Lookyloo() self.lookyloo = Lookyloo()
self.use_own_ua = get_config('generic', 'use_user_agents_users') self.use_own_ua = get_config('generic', 'use_user_agents_users')
def _to_run_forever(self): def _to_run_forever(self) -> None:
if self.use_own_ua: if self.use_own_ua:
self._build_ua_file() self._build_ua_file()
self._retry_failed_enqueue() self._retry_failed_enqueue()
def _build_ua_file(self): def _build_ua_file(self) -> None:
'''Build a file in a format compatible with the capture page''' '''Build a file in a format compatible with the capture page'''
yesterday = (date.today() - timedelta(days=1)) yesterday = (date.today() - timedelta(days=1))
self_generated_ua_file_path = get_homedir() / 'own_user_agents' / str(yesterday.year) / f'{yesterday.month:02}' self_generated_ua_file_path = get_homedir() / 'own_user_agents' / str(yesterday.year) / f'{yesterday.month:02}'
@ -44,7 +46,7 @@ class Processing(AbstractManager):
self.logger.info(f'No User-agent file for {yesterday} to generate.') self.logger.info(f'No User-agent file for {yesterday} to generate.')
return return
to_store: Dict[str, Any] = {'by_frequency': []} to_store: dict[str, Any] = {'by_frequency': []}
uas = Counter([entry.split('|', 1)[1] for entry in entries]) uas = Counter([entry.split('|', 1)[1] for entry in entries])
for ua, _ in uas.most_common(): for ua, _ in uas.most_common():
parsed_ua = ParsedUserAgent(ua) parsed_ua = ParsedUserAgent(ua)
@ -71,7 +73,7 @@ class Processing(AbstractManager):
self.lookyloo.redis.delete(f'user_agents|{yesterday.isoformat()}') self.lookyloo.redis.delete(f'user_agents|{yesterday.isoformat()}')
self.logger.info(f'User-agent file for {yesterday} generated.') self.logger.info(f'User-agent file for {yesterday} generated.')
def _retry_failed_enqueue(self): def _retry_failed_enqueue(self) -> None:
'''If enqueuing failed, the settings are added, with a UUID in the 'to_capture key', and they have a UUID''' '''If enqueuing failed, the settings are added, with a UUID in the 'to_capture key', and they have a UUID'''
for uuid in self.lookyloo.redis.zrevrangebyscore('to_capture', 'Inf', '-Inf'): for uuid in self.lookyloo.redis.zrevrangebyscore('to_capture', 'Inf', '-Inf'):
try_reenqueue = False try_reenqueue = False
@ -131,7 +133,7 @@ class Processing(AbstractManager):
self.logger.info(f'{uuid} enqueued.') self.logger.info(f'{uuid} enqueued.')
def main(): def main() -> None:
p = Processing() p = Processing()
p.run(sleep_in_sec=30) p.run(sleep_in_sec=30)

View File

@ -1,5 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import argparse import argparse
import os import os
import time import time
@ -24,14 +26,14 @@ def check_running(name: str) -> bool:
return False return False
def launch_cache(storage_directory: Optional[Path]=None): def launch_cache(storage_directory: Path | None=None) -> None:
if not storage_directory: if not storage_directory:
storage_directory = get_homedir() storage_directory = get_homedir()
if not check_running('cache'): if not check_running('cache'):
Popen(["./run_redis.sh"], cwd=(storage_directory / 'cache')) Popen(["./run_redis.sh"], cwd=(storage_directory / 'cache'))
def shutdown_cache(storage_directory: Optional[Path]=None): def shutdown_cache(storage_directory: Path | None=None) -> None:
if not storage_directory: if not storage_directory:
storage_directory = get_homedir() storage_directory = get_homedir()
r = Redis(unix_socket_path=get_socket_path('cache')) r = Redis(unix_socket_path=get_socket_path('cache'))
@ -39,14 +41,14 @@ def shutdown_cache(storage_directory: Optional[Path]=None):
print('Redis cache database shutdown.') print('Redis cache database shutdown.')
def launch_indexing(storage_directory: Optional[Path]=None): def launch_indexing(storage_directory: Path | None=None) -> None:
if not storage_directory: if not storage_directory:
storage_directory = get_homedir() storage_directory = get_homedir()
if not check_running('indexing'): if not check_running('indexing'):
Popen(["./run_redis.sh"], cwd=(storage_directory / 'indexing')) Popen(["./run_redis.sh"], cwd=(storage_directory / 'indexing'))
def shutdown_indexing(storage_directory: Optional[Path]=None): def shutdown_indexing(storage_directory: Path | None=None) -> None:
if not storage_directory: if not storage_directory:
storage_directory = get_homedir() storage_directory = get_homedir()
r = Redis(unix_socket_path=get_socket_path('indexing')) r = Redis(unix_socket_path=get_socket_path('indexing'))
@ -54,13 +56,13 @@ def shutdown_indexing(storage_directory: Optional[Path]=None):
print('Redis indexing database shutdown.') print('Redis indexing database shutdown.')
def launch_all(): def launch_all() -> None:
launch_cache() launch_cache()
launch_indexing() launch_indexing()
def check_all(stop: bool=False): def check_all(stop: bool=False) -> None:
backends: Dict[str, bool] = {'cache': False, 'indexing': False} backends: dict[str, bool] = {'cache': False, 'indexing': False}
while True: while True:
for db_name in backends.keys(): for db_name in backends.keys():
try: try:
@ -81,12 +83,12 @@ def check_all(stop: bool=False):
time.sleep(1) time.sleep(1)
def stop_all(): def stop_all() -> None:
shutdown_cache() shutdown_cache()
shutdown_indexing() shutdown_indexing()
def main(): def main() -> None:
parser = argparse.ArgumentParser(description='Manage backend DBs.') parser = argparse.ArgumentParser(description='Manage backend DBs.')
parser.add_argument("--start", action='store_true', default=False, help="Start all") parser.add_argument("--start", action='store_true', default=False, help="Start all")
parser.add_argument("--stop", action='store_true', default=False, help="Stop all") parser.add_argument("--stop", action='store_true', default=False, help="Stop all")

View File

@ -5,7 +5,7 @@ import time
from lookyloo.default import AbstractManager from lookyloo.default import AbstractManager
def main(): def main() -> None:
AbstractManager.force_shutdown() AbstractManager.force_shutdown()
time.sleep(5) time.sleep(5)
while True: while True:

View File

@ -5,7 +5,7 @@ from subprocess import Popen, run
from lookyloo.default import get_homedir from lookyloo.default import get_homedir
def main(): def main() -> None:
# Just fail if the env isn't set. # Just fail if the env isn't set.
get_homedir() get_homedir()
print('Start backend (redis)...') print('Start backend (redis)...')

View File

@ -13,13 +13,13 @@ logging.config.dictConfig(get_config('logging'))
class Website(AbstractManager): class Website(AbstractManager):
def __init__(self, loglevel: Optional[int]=None): def __init__(self, loglevel: Optional[int]=None) -> None:
super().__init__(loglevel) super().__init__(loglevel)
self.script_name = 'website' self.script_name = 'website'
self.process = self._launch_website() self.process: Popen = self._launch_website() # type: ignore[type-arg]
self.set_running() self.set_running()
def _launch_website(self): def _launch_website(self) -> Popen: # type: ignore[type-arg]
website_dir = get_homedir() / 'website' website_dir = get_homedir() / 'website'
ip = get_config('generic', 'website_listen_ip') ip = get_config('generic', 'website_listen_ip')
port = get_config('generic', 'website_listen_port') port = get_config('generic', 'website_listen_port')
@ -32,7 +32,7 @@ class Website(AbstractManager):
cwd=website_dir) cwd=website_dir)
def main(): def main() -> None:
w = Website() w = Website()
w.run(sleep_in_sec=10) w.run(sleep_in_sec=10)

View File

@ -8,7 +8,7 @@ from redis.exceptions import ConnectionError
from lookyloo.default import get_homedir, get_socket_path from lookyloo.default import get_homedir, get_socket_path
def main(): def main() -> None:
get_homedir() get_homedir()
p = Popen(['shutdown']) p = Popen(['shutdown'])
p.wait() p.wait()

View File

@ -15,14 +15,14 @@ from lookyloo.default import get_homedir, get_config
logging.config.dictConfig(get_config('logging')) logging.config.dictConfig(get_config('logging'))
def compute_hash_self(): def compute_hash_self() -> bytes:
m = hashlib.sha256() m = hashlib.sha256()
with (get_homedir() / 'bin' / 'update.py').open('rb') as f: with (get_homedir() / 'bin' / 'update.py').open('rb') as f:
m.update(f.read()) m.update(f.read())
return m.digest() return m.digest()
def keep_going(ignore=False): def keep_going(ignore: bool=False) -> None:
if ignore: if ignore:
return return
keep_going = input('Continue? (y/N) ') keep_going = input('Continue? (y/N) ')
@ -31,7 +31,7 @@ def keep_going(ignore=False):
sys.exit() sys.exit()
def run_command(command, expect_fail: bool=False, capture_output: bool=True): def run_command(command: str, expect_fail: bool=False, capture_output: bool=True) -> None:
args = shlex.split(command) args = shlex.split(command)
homedir = get_homedir() homedir = get_homedir()
process = subprocess.run(args, cwd=homedir, capture_output=capture_output) process = subprocess.run(args, cwd=homedir, capture_output=capture_output)
@ -42,7 +42,7 @@ def run_command(command, expect_fail: bool=False, capture_output: bool=True):
sys.exit() sys.exit()
def check_poetry_version(): def check_poetry_version() -> None:
args = shlex.split("poetry self -V") args = shlex.split("poetry self -V")
homedir = get_homedir() homedir = get_homedir()
process = subprocess.run(args, cwd=homedir, capture_output=True) process = subprocess.run(args, cwd=homedir, capture_output=True)
@ -58,7 +58,7 @@ def check_poetry_version():
sys.exit() sys.exit()
def main(): def main() -> None:
parser = argparse.ArgumentParser(description='Pull latest release, update dependencies, update and validate the config files, update 3rd deps for the website.') parser = argparse.ArgumentParser(description='Pull latest release, update dependencies, update and validate the config files, update 3rd deps for the website.')
parser.add_argument('--yes', default=False, action='store_true', help='Run all commands without asking.') parser.add_argument('--yes', default=False, action='store_true', help='Run all commands without asking.')
args = parser.parse_args() args = parser.parse_args()

View File

@ -1,3 +1,8 @@
import logging import logging
from .lookyloo import Lookyloo # noqa
from .indexing import Indexing # noqa
logging.getLogger(__name__).addHandler(logging.NullHandler()) logging.getLogger(__name__).addHandler(logging.NullHandler())
__all__ = ['Lookyloo', 'Indexing']

View File

@ -1,5 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import contextlib import contextlib
import gzip import gzip
import json import json
@ -13,15 +15,15 @@ import time
from collections.abc import Mapping from collections.abc import Mapping
from datetime import datetime from datetime import datetime
from functools import lru_cache from functools import lru_cache, _CacheInfo as CacheInfo
from logging import Logger, LoggerAdapter from logging import Logger, LoggerAdapter
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union, Set, MutableMapping from typing import Any, Dict, List, Optional, Tuple, Union, Set, MutableMapping, Iterator
import dns.rdatatype import dns.rdatatype
import dns.resolver import dns.resolver
from har2tree import CrawledTree, Har2TreeError, HarFile from har2tree import CrawledTree, Har2TreeError, HarFile # type: ignore[attr-defined]
from pyipasnhistory import IPASNHistory from pyipasnhistory import IPASNHistory # type: ignore[attr-defined]
from redis import Redis from redis import Redis
from .context import Context from .context import Context
@ -32,11 +34,11 @@ from .exceptions import MissingCaptureDirectory, NoValidHarFile, MissingUUID, Tr
from .modules import Cloudflare from .modules import Cloudflare
class LookylooCacheLogAdapter(LoggerAdapter): class LookylooCacheLogAdapter(LoggerAdapter): # type: ignore[type-arg]
""" """
Prepend log entry with the UUID of the capture Prepend log entry with the UUID of the capture
""" """
def process(self, msg: str, kwargs: MutableMapping[str, Any]) -> Tuple[str, MutableMapping[str, Any]]: def process(self, msg: str, kwargs: MutableMapping[str, Any]) -> tuple[str, MutableMapping[str, Any]]:
if self.extra: if self.extra:
return '[{}] {}'.format(self.extra['uuid'], msg), kwargs return '[{}] {}'.format(self.extra['uuid'], msg), kwargs
return msg, kwargs return msg, kwargs
@ -47,10 +49,10 @@ class CaptureCache():
'error', 'no_index', 'categories', 'parent', 'error', 'no_index', 'categories', 'parent',
'user_agent', 'referer', 'logger') 'user_agent', 'referer', 'logger')
def __init__(self, cache_entry: Dict[str, Any]): def __init__(self, cache_entry: dict[str, Any]):
logger = logging.getLogger(f'{self.__class__.__name__}') logger = logging.getLogger(f'{self.__class__.__name__}')
logger.setLevel(get_config('generic', 'loglevel')) logger.setLevel(get_config('generic', 'loglevel'))
__default_cache_keys: Tuple[str, str, str, str, str, str] = ('uuid', 'title', 'timestamp', __default_cache_keys: tuple[str, str, str, str, str, str] = ('uuid', 'title', 'timestamp',
'url', 'redirects', 'capture_dir') 'url', 'redirects', 'capture_dir')
if 'uuid' not in cache_entry or 'capture_dir' not in cache_entry: if 'uuid' not in cache_entry or 'capture_dir' not in cache_entry:
raise LookylooException(f'The capture is deeply broken: {cache_entry}') raise LookylooException(f'The capture is deeply broken: {cache_entry}')
@ -80,16 +82,16 @@ class CaptureCache():
# If the microsecond is missing (0), it fails # If the microsecond is missing (0), it fails
self.timestamp = datetime.strptime(cache_entry['timestamp'], '%Y-%m-%dT%H:%M:%S%z') self.timestamp = datetime.strptime(cache_entry['timestamp'], '%Y-%m-%dT%H:%M:%S%z')
self.redirects: List[str] = json.loads(cache_entry['redirects']) if cache_entry.get('redirects') else [] self.redirects: list[str] = json.loads(cache_entry['redirects']) if cache_entry.get('redirects') else []
# Error without all the keys in __default_cache_keys was fatal. # Error without all the keys in __default_cache_keys was fatal.
# if the keys in __default_cache_keys are present, it was an HTTP error and we still need to pass the error along # if the keys in __default_cache_keys are present, it was an HTTP error and we still need to pass the error along
self.error: Optional[str] = cache_entry.get('error') self.error: str | None = cache_entry.get('error')
self.no_index: bool = True if cache_entry.get('no_index') in [1, '1'] else False self.no_index: bool = True if cache_entry.get('no_index') in [1, '1'] else False
self.categories: List[str] = json.loads(cache_entry['categories']) if cache_entry.get('categories') else [] self.categories: list[str] = json.loads(cache_entry['categories']) if cache_entry.get('categories') else []
self.parent: Optional[str] = cache_entry.get('parent') self.parent: str | None = cache_entry.get('parent')
self.user_agent: Optional[str] = cache_entry.get('user_agent') self.user_agent: str | None = cache_entry.get('user_agent')
self.referer: Optional[str] = cache_entry.get('referer') self.referer: str | None = cache_entry.get('referer')
@property @property
def tree(self) -> CrawledTree: def tree(self) -> CrawledTree:
@ -142,26 +144,26 @@ def load_pickle_tree(capture_dir: Path, last_mod_time: int, logger: Logger) -> C
raise NoValidHarFile("Couldn't find HAR files") raise NoValidHarFile("Couldn't find HAR files")
def serialize_sets(obj): def serialize_sets(obj: Any) -> Any:
if isinstance(obj, set): if isinstance(obj, set):
return list(obj) return list(obj)
return obj return obj
class CapturesIndex(Mapping): class CapturesIndex(Mapping): # type: ignore[type-arg]
def __init__(self, redis: Redis, contextualizer: Optional[Context]=None): def __init__(self, redis: Redis, contextualizer: Context | None=None) -> None: # type: ignore[type-arg]
self.logger = logging.getLogger(f'{self.__class__.__name__}') self.logger = logging.getLogger(f'{self.__class__.__name__}')
self.logger.setLevel(get_config('generic', 'loglevel')) self.logger.setLevel(get_config('generic', 'loglevel'))
self.redis = redis self.redis = redis
self.indexing = Indexing() self.indexing = Indexing()
self.contextualizer = contextualizer self.contextualizer = contextualizer
self.__cache: Dict[str, CaptureCache] = {} self.__cache: dict[str, CaptureCache] = {}
self._quick_init() self._quick_init()
self.timeout = get_config('generic', 'max_tree_create_time') self.timeout = get_config('generic', 'max_tree_create_time')
try: try:
self.ipasnhistory: Optional[IPASNHistory] = IPASNHistory() self.ipasnhistory: IPASNHistory | None = IPASNHistory()
if not self.ipasnhistory.is_up: if not self.ipasnhistory.is_up:
self.ipasnhistory = None self.ipasnhistory = None
except Exception as e: except Exception as e:
@ -169,7 +171,7 @@ class CapturesIndex(Mapping):
self.logger.warning(f'Unable to setup IPASN History: {e}') self.logger.warning(f'Unable to setup IPASN History: {e}')
self.ipasnhistory = None self.ipasnhistory = None
try: try:
self.cloudflare: Optional[Cloudflare] = Cloudflare() self.cloudflare: Cloudflare | None = Cloudflare()
if not self.cloudflare.available: if not self.cloudflare.available:
self.cloudflare = None self.cloudflare = None
except Exception as e: except Exception as e:
@ -177,7 +179,7 @@ class CapturesIndex(Mapping):
self.cloudflare = None self.cloudflare = None
@property @property
def cached_captures(self) -> Set[str]: def cached_captures(self) -> set[str]:
self._quick_init() self._quick_init()
return set(self.__cache.keys()) return set(self.__cache.keys())
@ -199,10 +201,10 @@ class CapturesIndex(Mapping):
self.__cache[uuid] = self._set_capture_cache(capture_dir) self.__cache[uuid] = self._set_capture_cache(capture_dir)
return self.__cache[uuid] return self.__cache[uuid]
def __iter__(self): def __iter__(self) -> Iterator[dict[str, CaptureCache]]:
return iter(self.__cache) return iter(self.__cache) # type: ignore[arg-type]
def __len__(self): def __len__(self) -> int:
return len(self.__cache) return len(self.__cache)
def reload_cache(self, uuid: str) -> None: def reload_cache(self, uuid: str) -> None:
@ -221,7 +223,7 @@ class CapturesIndex(Mapping):
self.redis.flushdb() self.redis.flushdb()
self.__cache = {} self.__cache = {}
def lru_cache_status(self): def lru_cache_status(self) -> CacheInfo:
return load_pickle_tree.cache_info() return load_pickle_tree.cache_info()
def _quick_init(self) -> None: def _quick_init(self) -> None:
@ -332,11 +334,11 @@ class CapturesIndex(Mapping):
return tree return tree
@staticmethod @staticmethod
def _raise_timeout(_, __): def _raise_timeout(_, __) -> None: # type: ignore[no-untyped-def]
raise TimeoutError raise TimeoutError
@contextlib.contextmanager @contextlib.contextmanager
def _timeout_context(self): def _timeout_context(self) -> Iterator[None]:
if self.timeout != 0: if self.timeout != 0:
# Register a function to raise a TimeoutError on the signal. # Register a function to raise a TimeoutError on the signal.
signal.signal(signal.SIGALRM, self._raise_timeout) signal.signal(signal.SIGALRM, self._raise_timeout)
@ -378,7 +380,7 @@ class CapturesIndex(Mapping):
logger.warning(f'Unable to rebuild the tree for {capture_dir}, the HAR files are broken.') logger.warning(f'Unable to rebuild the tree for {capture_dir}, the HAR files are broken.')
tree = None tree = None
cache: Dict[str, Union[str, int]] = {'uuid': uuid, 'capture_dir': capture_dir_str} cache: dict[str, str | int] = {'uuid': uuid, 'capture_dir': capture_dir_str}
if capture_settings.get('url'): if capture_settings.get('url'):
cache['url'] = capture_settings['url'] cache['url'] = capture_settings['url']
@ -450,18 +452,18 @@ class CapturesIndex(Mapping):
p.execute() p.execute()
return CaptureCache(cache) return CaptureCache(cache)
def __resolve_dns(self, ct: CrawledTree, logger: LookylooCacheLogAdapter): def __resolve_dns(self, ct: CrawledTree, logger: LookylooCacheLogAdapter) -> CrawledTree:
'''Resolves all domains of the tree, keeps A (IPv4), AAAA (IPv6), and CNAME entries '''Resolves all domains of the tree, keeps A (IPv4), AAAA (IPv6), and CNAME entries
and store them in ips.json and cnames.json, in the capture directory. and store them in ips.json and cnames.json, in the capture directory.
Updates the nodes of the tree accordingly so the information is available. Updates the nodes of the tree accordingly so the information is available.
''' '''
def _build_cname_chain(known_cnames: Dict[str, str], hostname) -> List[str]: def _build_cname_chain(known_cnames: dict[str, str], hostname: str) -> list[str]:
'''Returns a list of CNAMEs starting from one hostname. '''Returns a list of CNAMEs starting from one hostname.
The CNAMEs resolutions are made in `_resolve_dns`. A hostname can have a CNAME entry The CNAMEs resolutions are made in `_resolve_dns`. A hostname can have a CNAME entry
and the CNAME entry can have an other CNAME entry, and so on multiple times. and the CNAME entry can have an other CNAME entry, and so on multiple times.
This method loops over the hostnames until there are no CNAMES.''' This method loops over the hostnames until there are no CNAMES.'''
cnames: List[str] = [] cnames: list[str] = []
to_search = hostname to_search = hostname
while True: while True:
if not known_cnames.get(to_search): if not known_cnames.get(to_search):
@ -474,7 +476,7 @@ class CapturesIndex(Mapping):
ips_path = ct.root_hartree.har.path.parent / 'ips.json' ips_path = ct.root_hartree.har.path.parent / 'ips.json'
ipasn_path = ct.root_hartree.har.path.parent / 'ipasn.json' ipasn_path = ct.root_hartree.har.path.parent / 'ipasn.json'
host_cnames: Dict[str, str] = {} host_cnames: dict[str, str] = {}
if cnames_path.exists(): if cnames_path.exists():
try: try:
with cnames_path.open() as f: with cnames_path.open() as f:
@ -483,7 +485,7 @@ class CapturesIndex(Mapping):
# The json is broken, delete and re-trigger the requests # The json is broken, delete and re-trigger the requests
host_cnames = {} host_cnames = {}
host_ips: Dict[str, Dict[str, Set[str]]] = {} host_ips: dict[str, dict[str, set[str]]] = {}
if ips_path.exists(): if ips_path.exists():
try: try:
with ips_path.open() as f: with ips_path.open() as f:
@ -492,7 +494,7 @@ class CapturesIndex(Mapping):
# The json is broken, delete and re-trigger the requests # The json is broken, delete and re-trigger the requests
host_ips = {} host_ips = {}
ipasn: Dict[str, Dict[str, str]] = {} ipasn: dict[str, dict[str, str]] = {}
if ipasn_path.exists(): if ipasn_path.exists():
try: try:
with ipasn_path.open() as f: with ipasn_path.open() as f:

View File

@ -1,11 +1,13 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import fnmatch import fnmatch
import logging import logging
from typing import Dict, Any, Union, List, Optional, TypedDict, Tuple from typing import Dict, Any, Union, List, Optional, TypedDict, Tuple
from har2tree import URLNode from har2tree import URLNode # type: ignore[attr-defined]
from redis import ConnectionPool, Redis from redis import ConnectionPool, Redis
from redis.connection import UnixDomainSocketConnection from redis.connection import UnixDomainSocketConnection
@ -19,8 +21,8 @@ from .exceptions import MissingUUID, TreeNeedsRebuild
class CompareSettings(TypedDict): class CompareSettings(TypedDict):
'''The settings that can be passed to the compare method to filter out some differences''' '''The settings that can be passed to the compare method to filter out some differences'''
ressources_ignore_domains: Tuple[str, ...] ressources_ignore_domains: tuple[str, ...]
ressources_ignore_regexes: Tuple[str, ...] ressources_ignore_regexes: tuple[str, ...]
ignore_ips: bool ignore_ips: bool
@ -39,16 +41,16 @@ class Comparator():
self.public_domain = get_config('generic', 'public_domain') self.public_domain = get_config('generic', 'public_domain')
@property @property
def redis(self) -> Redis: def redis(self) -> Redis: # type: ignore[type-arg]
return Redis(connection_pool=self.redis_pool) return Redis(connection_pool=self.redis_pool)
def get_comparables_node(self, node: URLNode) -> Dict[str, str]: def get_comparables_node(self, node: URLNode) -> dict[str, str]:
to_return = {'url': node.name, 'hostname': node.hostname} to_return = {'url': node.name, 'hostname': node.hostname}
if hasattr(node, 'ip_address'): if hasattr(node, 'ip_address'):
to_return['ip_address'] = str(node.ip_address) to_return['ip_address'] = str(node.ip_address)
return to_return return to_return
def _compare_nodes(self, left: Dict[str, str], right: Dict[str, str], /, different: bool, ignore_ips: bool) -> Tuple[bool, Dict[str, Any]]: def _compare_nodes(self, left: dict[str, str], right: dict[str, str], /, different: bool, ignore_ips: bool) -> tuple[bool, dict[str, Any]]:
to_return = {} to_return = {}
# URL # URL
if left['url'] != right['url']: if left['url'] != right['url']:
@ -78,12 +80,12 @@ class Comparator():
# IPs in hostnode + ASNs # IPs in hostnode + ASNs
return different, to_return return different, to_return
def get_comparables_capture(self, capture_uuid: str) -> Dict[str, Any]: def get_comparables_capture(self, capture_uuid: str) -> dict[str, Any]:
if capture_uuid not in self._captures_index: if capture_uuid not in self._captures_index:
raise MissingUUID(f'{capture_uuid} does not exists.') raise MissingUUID(f'{capture_uuid} does not exists.')
capture = self._captures_index[capture_uuid] capture = self._captures_index[capture_uuid]
to_return: Dict[str, Any] to_return: dict[str, Any]
try: try:
if capture.error: if capture.error:
# The error on lookyloo is too verbose and contains the UUID of the capture, skip that. # The error on lookyloo is too verbose and contains the UUID of the capture, skip that.
@ -108,17 +110,17 @@ class Comparator():
to_return = {'error': str(e)} to_return = {'error': str(e)}
return to_return return to_return
def compare_captures(self, capture_left: str, capture_right: str, /, *, settings: Optional[CompareSettings]=None) -> Tuple[bool, Dict[str, Any]]: def compare_captures(self, capture_left: str, capture_right: str, /, *, settings: CompareSettings | None=None) -> tuple[bool, dict[str, Any]]:
if capture_left not in self._captures_index: if capture_left not in self._captures_index:
raise MissingUUID(f'{capture_left} does not exists.') raise MissingUUID(f'{capture_left} does not exists.')
if capture_right not in self._captures_index: if capture_right not in self._captures_index:
raise MissingUUID(f'{capture_right} does not exists.') raise MissingUUID(f'{capture_right} does not exists.')
different: bool = False different: bool = False
to_return: Dict[str, Dict[str, Union[str, to_return: dict[str, dict[str, (str |
List[Union[str, Dict[str, Any]]], list[str | dict[str, Any]] |
Dict[str, Union[int, str, dict[str, (int | str |
List[Union[int, str, Dict[str, Any]]]]]]]] = {} list[int | str | dict[str, Any]])])]] = {}
to_return['lookyloo_urls'] = {'left': f'https://{self.public_domain}/tree/{capture_left}', to_return['lookyloo_urls'] = {'left': f'https://{self.public_domain}/tree/{capture_left}',
'right': f'https://{self.public_domain}/tree/{capture_right}'} 'right': f'https://{self.public_domain}/tree/{capture_right}'}
left = self.get_comparables_capture(capture_left) left = self.get_comparables_capture(capture_left)
@ -192,7 +194,7 @@ class Comparator():
'details': left['redirects']['length']} 'details': left['redirects']['length']}
# Prepare settings # Prepare settings
_settings: Optional[CompareSettings] _settings: CompareSettings | None
if settings: if settings:
# cleanup the settings # cleanup the settings
_ignore_domains = set(settings['ressources_ignore_domains'] if settings.get('ressources_ignore_domains') else []) _ignore_domains = set(settings['ressources_ignore_domains'] if settings.get('ressources_ignore_domains') else [])

View File

@ -1,12 +1,14 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import json import json
import logging import logging
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Union from typing import Any, Dict, List, Optional, Set, Union
from urllib.parse import urlsplit from urllib.parse import urlsplit
from har2tree import CrawledTree, HostNode, URLNode from har2tree import CrawledTree, HostNode, URLNode # type: ignore[attr-defined]
from redis import Redis from redis import Redis
from .default import get_config, get_homedir, get_socket_path from .default import get_config, get_homedir, get_socket_path
@ -16,14 +18,14 @@ from .modules import SaneJavaScript
class Context(): class Context():
def __init__(self): def __init__(self) -> None:
self.logger = logging.getLogger(f'{self.__class__.__name__}') self.logger = logging.getLogger(f'{self.__class__.__name__}')
self.logger.setLevel(get_config('generic', 'loglevel')) self.logger.setLevel(get_config('generic', 'loglevel'))
self.redis: Redis = Redis(unix_socket_path=get_socket_path('indexing'), db=1, decode_responses=True) self.redis: Redis = Redis(unix_socket_path=get_socket_path('indexing'), db=1, decode_responses=True) # type: ignore[type-arg]
self._cache_known_content() self._cache_known_content()
self.sanejs = SaneJavaScript(config_name='SaneJS') self.sanejs = SaneJavaScript(config_name='SaneJS')
def clear_context(self): def clear_context(self) -> None:
self.redis.flushdb() self.redis.flushdb()
def _cache_known_content(self) -> None: def _cache_known_content(self) -> None:
@ -55,13 +57,13 @@ class Context():
p.sadd(f'bh|{h}|legitimate', *details['hostnames']) p.sadd(f'bh|{h}|legitimate', *details['hostnames'])
p.execute() p.execute()
def find_known_content(self, har2tree_container: Union[CrawledTree, HostNode, URLNode, str]) -> Dict[str, Any]: def find_known_content(self, har2tree_container: CrawledTree | HostNode | URLNode | str) -> dict[str, Any]:
"""Return a dictionary of content resources found in the local known_content database, or in SaneJS (if enabled)""" """Return a dictionary of content resources found in the local known_content database, or in SaneJS (if enabled)"""
if isinstance(har2tree_container, str): if isinstance(har2tree_container, str):
to_lookup: Set[str] = {har2tree_container, } to_lookup: set[str] = {har2tree_container, }
else: else:
to_lookup = get_resources_hashes(har2tree_container) to_lookup = get_resources_hashes(har2tree_container)
known_content_table: Dict[str, Any] = {} known_content_table: dict[str, Any] = {}
if not to_lookup: if not to_lookup:
return known_content_table return known_content_table
# get generic known content # get generic known content
@ -113,7 +115,7 @@ class Context():
return known_content_table return known_content_table
def store_known_legitimate_tree(self, tree: CrawledTree): def store_known_legitimate_tree(self, tree: CrawledTree) -> None:
known_content = self.find_known_content(tree) known_content = self.find_known_content(tree)
capture_file: Path = get_homedir() / 'known_content_user' / f'{urlsplit(tree.root_url).hostname}.json' capture_file: Path = get_homedir() / 'known_content_user' / f'{urlsplit(tree.root_url).hostname}.json'
if capture_file.exists(): if capture_file.exists():
@ -156,7 +158,7 @@ class Context():
with open(capture_file, 'w') as f: with open(capture_file, 'w') as f:
json.dump(to_store, f, indent=2, default=serialize_to_json) json.dump(to_store, f, indent=2, default=serialize_to_json)
def mark_as_legitimate(self, tree: CrawledTree, hostnode_uuid: Optional[str]=None, urlnode_uuid: Optional[str]=None) -> None: def mark_as_legitimate(self, tree: CrawledTree, hostnode_uuid: str | None=None, urlnode_uuid: str | None=None) -> None:
if hostnode_uuid: if hostnode_uuid:
urlnodes = tree.root_hartree.get_host_node_by_uuid(hostnode_uuid).urls urlnodes = tree.root_hartree.get_host_node_by_uuid(hostnode_uuid).urls
elif urlnode_uuid: elif urlnode_uuid:
@ -214,7 +216,7 @@ class Context():
def legitimate_body(self, body_hash: str, legitimate_hostname: str) -> None: def legitimate_body(self, body_hash: str, legitimate_hostname: str) -> None:
self.redis.sadd(f'bh|{body_hash}|legitimate', legitimate_hostname) self.redis.sadd(f'bh|{body_hash}|legitimate', legitimate_hostname)
def store_known_malicious_ressource(self, ressource_hash: str, details: Dict[str, str]): def store_known_malicious_ressource(self, ressource_hash: str, details: dict[str, str]) -> None:
known_malicious_ressource_file = get_homedir() / 'known_content_user' / 'malicious.json' known_malicious_ressource_file = get_homedir() / 'known_content_user' / 'malicious.json'
if known_malicious_ressource_file.exists(): if known_malicious_ressource_file.exists():
with open(known_malicious_ressource_file) as f: with open(known_malicious_ressource_file) as f:
@ -236,7 +238,7 @@ class Context():
with open(known_malicious_ressource_file, 'w') as f: with open(known_malicious_ressource_file, 'w') as f:
json.dump(to_store, f, indent=2, default=serialize_to_json) json.dump(to_store, f, indent=2, default=serialize_to_json)
def add_malicious(self, ressource_hash: str, details: Dict[str, str]): def add_malicious(self, ressource_hash: str, details: dict[str, str]) -> None:
self.store_known_malicious_ressource(ressource_hash, details) self.store_known_malicious_ressource(ressource_hash, details)
p = self.redis.pipeline() p = self.redis.pipeline()
p.sadd('bh|malicious', ressource_hash) p.sadd('bh|malicious', ressource_hash)
@ -246,7 +248,7 @@ class Context():
p.sadd(f'{ressource_hash}|tag', details['type']) p.sadd(f'{ressource_hash}|tag', details['type'])
p.execute() p.execute()
def store_known_legitimate_ressource(self, ressource_hash: str, details: Dict[str, str]): def store_known_legitimate_ressource(self, ressource_hash: str, details: dict[str, str]) -> None:
known_legitimate_ressource_file = get_homedir() / 'known_content_user' / 'legitimate.json' known_legitimate_ressource_file = get_homedir() / 'known_content_user' / 'legitimate.json'
if known_legitimate_ressource_file.exists(): if known_legitimate_ressource_file.exists():
with open(known_legitimate_ressource_file) as f: with open(known_legitimate_ressource_file) as f:
@ -267,7 +269,7 @@ class Context():
with open(known_legitimate_ressource_file, 'w') as f: with open(known_legitimate_ressource_file, 'w') as f:
json.dump(to_store, f, indent=2, default=serialize_to_json) json.dump(to_store, f, indent=2, default=serialize_to_json)
def add_legitimate(self, ressource_hash: str, details: Dict[str, str]): def add_legitimate(self, ressource_hash: str, details: dict[str, str]) -> None:
self.store_known_legitimate_ressource(ressource_hash, details) self.store_known_legitimate_ressource(ressource_hash, details)
if 'domain' in details: if 'domain' in details:
self.redis.sadd(f'bh|{ressource_hash}|legitimate', details['domain']) self.redis.sadd(f'bh|{ressource_hash}|legitimate', details['domain'])
@ -277,7 +279,7 @@ class Context():
# Query DB # Query DB
def is_legitimate(self, urlnode: URLNode, known_hashes: Dict[str, Any]) -> Optional[bool]: def is_legitimate(self, urlnode: URLNode, known_hashes: dict[str, Any]) -> bool | None:
""" """
If legitimate if generic, marked as legitimate or known on sanejs, loaded from the right domain If legitimate if generic, marked as legitimate or known on sanejs, loaded from the right domain
3 cases: 3 cases:
@ -285,7 +287,7 @@ class Context():
* False if *any* content is malicious * False if *any* content is malicious
* None in all other cases * None in all other cases
""" """
status: List[Optional[bool]] = [] status: list[bool | None] = []
for h in urlnode.resources_hashes: for h in urlnode.resources_hashes:
# Note: we can have multiple hashes on the same urlnode (see embedded resources). # Note: we can have multiple hashes on the same urlnode (see embedded resources).
if h not in known_hashes: if h not in known_hashes:
@ -305,7 +307,7 @@ class Context():
return True # All the contents are known legitimate return True # All the contents are known legitimate
return None return None
def is_malicious(self, urlnode: URLNode, known_hashes: Dict[str, Any]) -> Optional[bool]: def is_malicious(self, urlnode: URLNode, known_hashes: dict[str, Any]) -> bool | None:
"""3 cases: """3 cases:
* True if *any* content is malicious * True if *any* content is malicious
* False if *all* the contents are known legitimate * False if *all* the contents are known legitimate

View File

@ -16,3 +16,17 @@ from .exceptions import MissingEnv, CreateDirectoryException, ConfigError # noq
from .helpers import get_homedir, load_configs, get_config, safe_create_dir, get_socket_path, try_make_file # noqa from .helpers import get_homedir, load_configs, get_config, safe_create_dir, get_socket_path, try_make_file # noqa
os.chdir(get_homedir()) os.chdir(get_homedir())
__all__ = [
'LookylooException',
'AbstractManager',
'MissingEnv',
'CreateDirectoryException',
'ConfigError',
'get_homedir',
'load_configs',
'get_config',
'safe_create_dir',
'get_socket_path',
'try_make_file',
]

View File

@ -1,14 +1,16 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import asyncio import asyncio
import logging import logging
import logging.config
import os import os
import signal import signal
import time import time
from abc import ABC from abc import ABC
from datetime import datetime, timedelta from datetime import datetime, timedelta
from subprocess import Popen from subprocess import Popen
from typing import List, Optional, Tuple
from redis import Redis from redis import Redis
from redis.exceptions import ConnectionError as RedisConnectionError from redis.exceptions import ConnectionError as RedisConnectionError
@ -20,18 +22,18 @@ class AbstractManager(ABC):
script_name: str script_name: str
def __init__(self, loglevel: Optional[int]=None): def __init__(self, loglevel: int | None=None):
self.loglevel: int = loglevel if loglevel is not None else get_config('generic', 'loglevel') or logging.INFO self.loglevel: int = loglevel if loglevel is not None else get_config('generic', 'loglevel') or logging.INFO
self.logger = logging.getLogger(f'{self.__class__.__name__}') self.logger = logging.getLogger(f'{self.__class__.__name__}')
self.logger.setLevel(self.loglevel) self.logger.setLevel(self.loglevel)
self.logger.info(f'Initializing {self.__class__.__name__}') self.logger.info(f'Initializing {self.__class__.__name__}')
self.process: Optional[Popen] = None self.process: Popen | None = None # type: ignore[type-arg]
self.__redis = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True) self.__redis = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
self.force_stop = False self.force_stop = False
@staticmethod @staticmethod
def is_running() -> List[Tuple[str, float]]: def is_running() -> list[tuple[str, float]]:
try: try:
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True) r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
for script_name, score in r.zrangebyscore('running', '-inf', '+inf', withscores=True): for script_name, score in r.zrangebyscore('running', '-inf', '+inf', withscores=True):
@ -52,7 +54,7 @@ class AbstractManager(ABC):
return [] return []
@staticmethod @staticmethod
def clear_running(): def clear_running() -> None:
try: try:
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True) r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
r.delete('running') r.delete('running')
@ -60,14 +62,14 @@ class AbstractManager(ABC):
print('Unable to connect to redis, the system is down.') print('Unable to connect to redis, the system is down.')
@staticmethod @staticmethod
def force_shutdown(): def force_shutdown() -> None:
try: try:
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True) r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
r.set('shutdown', 1) r.set('shutdown', 1)
except RedisConnectionError: except RedisConnectionError:
print('Unable to connect to redis, the system is down.') print('Unable to connect to redis, the system is down.')
def set_running(self, number: Optional[int]=None) -> None: def set_running(self, number: int | None=None) -> None:
if number == 0: if number == 0:
self.__redis.zrem('running', self.script_name) self.__redis.zrem('running', self.script_name)
else: else:
@ -111,7 +113,7 @@ class AbstractManager(ABC):
def _to_run_forever(self) -> None: def _to_run_forever(self) -> None:
raise NotImplementedError('This method must be implemented by the child') raise NotImplementedError('This method must be implemented by the child')
def _kill_process(self): def _kill_process(self) -> None:
if self.process is None: if self.process is None:
return return
kill_order = [signal.SIGWINCH, signal.SIGTERM, signal.SIGINT, signal.SIGKILL] kill_order = [signal.SIGWINCH, signal.SIGTERM, signal.SIGINT, signal.SIGKILL]
@ -167,7 +169,7 @@ class AbstractManager(ABC):
def _wait_to_finish(self) -> None: def _wait_to_finish(self) -> None:
self.logger.info('Not implemented, nothing to wait for.') self.logger.info('Not implemented, nothing to wait for.')
async def stop(self): async def stop(self) -> None:
self.force_stop = True self.force_stop = True
async def _to_run_forever_async(self) -> None: async def _to_run_forever_async(self) -> None:
@ -176,7 +178,7 @@ class AbstractManager(ABC):
async def _wait_to_finish_async(self) -> None: async def _wait_to_finish_async(self) -> None:
self.logger.info('Not implemented, nothing to wait for.') self.logger.info('Not implemented, nothing to wait for.')
async def stop_async(self): async def stop_async(self) -> None:
"""Method to pass the signal handler: """Method to pass the signal handler:
loop.add_signal_handler(signal.SIGTERM, lambda: loop.create_task(p.stop())) loop.add_signal_handler(signal.SIGTERM, lambda: loop.create_task(p.stop()))
""" """

View File

@ -1,4 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import json import json
import logging import logging
import os import os
@ -9,7 +12,7 @@ from typing import Any, Dict, Optional, Union
from . import env_global_name from . import env_global_name
from .exceptions import ConfigError, CreateDirectoryException, MissingEnv from .exceptions import ConfigError, CreateDirectoryException, MissingEnv
configs: Dict[str, Dict[str, Any]] = {} configs: dict[str, dict[str, Any]] = {}
logger = logging.getLogger('Helpers') logger = logging.getLogger('Helpers')
@ -34,7 +37,7 @@ Run the following command (assuming you run the code from the clonned repository
@lru_cache(64) @lru_cache(64)
def load_configs(path_to_config_files: Optional[Union[str, Path]]=None): def load_configs(path_to_config_files: str | Path | None=None) -> None:
global configs global configs
if configs: if configs:
return return
@ -57,7 +60,7 @@ def load_configs(path_to_config_files: Optional[Union[str, Path]]=None):
@lru_cache(64) @lru_cache(64)
def get_config(config_type: str, entry: Optional[str]=None, quiet: bool=False) -> Any: def get_config(config_type: str, entry: str | None=None, quiet: bool=False) -> Any:
"""Get an entry from the given config_type file. Automatic fallback to the sample file""" """Get an entry from the given config_type file. Automatic fallback to the sample file"""
global configs global configs
if not configs: if not configs:
@ -97,7 +100,7 @@ def get_socket_path(name: str) -> str:
return str(get_homedir() / mapping[name]) return str(get_homedir() / mapping[name])
def try_make_file(filename: Path): def try_make_file(filename: Path) -> bool:
try: try:
filename.touch(exist_ok=False) filename.touch(exist_ok=False)
return True return True

View File

@ -14,23 +14,22 @@ from typing import Any, Dict, List, Optional, Set, Union, Tuple
from urllib.parse import urlparse from urllib.parse import urlparse
from har2tree import CrawledTree, HostNode, URLNode from har2tree import CrawledTree, HostNode, URLNode # type: ignore[attr-defined]
from playwrightcapture import get_devices from playwrightcapture import get_devices
from publicsuffixlist import PublicSuffixList # type: ignore from publicsuffixlist import PublicSuffixList # type: ignore
from pytaxonomies import Taxonomies from pytaxonomies import Taxonomies # type: ignore[attr-defined]
from ua_parser import user_agent_parser # type: ignore from ua_parser import user_agent_parser # type: ignore
from werkzeug.user_agent import UserAgent from werkzeug.user_agent import UserAgent
from werkzeug.utils import cached_property from werkzeug.utils import cached_property
from .default import get_homedir, safe_create_dir, get_config from .default import get_homedir, safe_create_dir, get_config, LookylooException
from .exceptions import LookylooException
logger = logging.getLogger('Lookyloo - Helpers') logger = logging.getLogger('Lookyloo - Helpers')
# This method is used in json.dump or json.dumps calls as the default parameter: # This method is used in json.dump or json.dumps calls as the default parameter:
# json.dumps(..., default=dump_to_json) # json.dumps(..., default=dump_to_json)
def serialize_to_json(obj: Union[Set]) -> Union[List]: def serialize_to_json(obj: Union[Set[Any]]) -> Union[List[Any]]:
if isinstance(obj, set): if isinstance(obj, set):
return sorted(obj) return sorted(obj)
@ -52,12 +51,12 @@ def get_resources_hashes(har2tree_container: Union[CrawledTree, HostNode, URLNod
@lru_cache(64) @lru_cache(64)
def get_taxonomies(): def get_taxonomies() -> Taxonomies:
return Taxonomies() return Taxonomies()
@lru_cache(64) @lru_cache(64)
def get_public_suffix_list(): def get_public_suffix_list() -> PublicSuffixList:
"""Initialize Public Suffix List""" """Initialize Public Suffix List"""
# TODO (?): fetch the list # TODO (?): fetch the list
return PublicSuffixList() return PublicSuffixList()
@ -131,7 +130,7 @@ def get_sorted_captures_from_disk(captures_dir: Path, /, *,
class UserAgents: class UserAgents:
def __init__(self): def __init__(self) -> None:
if get_config('generic', 'use_user_agents_users'): if get_config('generic', 'use_user_agents_users'):
self.path = get_homedir() / 'own_user_agents' self.path = get_homedir() / 'own_user_agents'
else: else:
@ -145,14 +144,14 @@ class UserAgents:
self.playwright_devices = get_devices() self.playwright_devices = get_devices()
self._load_newest_ua_file(ua_files_path[0]) self._load_newest_ua_file(ua_files_path[0])
def _load_newest_ua_file(self, path: Path): def _load_newest_ua_file(self, path: Path) -> None:
self.most_recent_ua_path = path self.most_recent_ua_path = path
with self.most_recent_ua_path.open() as f: with self.most_recent_ua_path.open() as f:
self.most_recent_uas = json.load(f) self.most_recent_uas = json.load(f)
self.by_freq = self.most_recent_uas.pop('by_frequency') self.by_freq = self.most_recent_uas.pop('by_frequency')
self._load_playwright_devices() self._load_playwright_devices()
def _load_playwright_devices(self): def _load_playwright_devices(self) -> None:
# Only get default and desktop for now. # Only get default and desktop for now.
for device_name, details in self.playwright_devices['desktop']['default'].items(): for device_name, details in self.playwright_devices['desktop']['default'].items():
parsed_ua = ParsedUserAgent(details['user_agent']) parsed_ua = ParsedUserAgent(details['user_agent'])
@ -254,16 +253,17 @@ def load_cookies(cookie_pseudofile: Optional[Union[BufferedIOBase, str, bytes, L
return to_return return to_return
def uniq_domains(uniq_urls): def uniq_domains(uniq_urls: List[str]) -> Set[str]:
domains = set() domains = set()
for url in uniq_urls: for url in uniq_urls:
splitted = urlparse(url) splitted = urlparse(url)
domains.add(splitted.hostname) if splitted.hostname:
domains.add(splitted.hostname)
return domains return domains
@lru_cache(64) @lru_cache(64)
def get_useragent_for_requests(): def get_useragent_for_requests() -> str:
return f'Lookyloo / {version("lookyloo")}' return f'Lookyloo / {version("lookyloo")}'
@ -331,11 +331,11 @@ class ParsedUserAgent(UserAgent):
# from https://python.tutorialink.com/how-do-i-get-the-user-agent-with-flask/ # from https://python.tutorialink.com/how-do-i-get-the-user-agent-with-flask/
@cached_property @cached_property
def _details(self): def _details(self) -> Dict[str, Any]:
return user_agent_parser.Parse(self.string) return user_agent_parser.Parse(self.string)
@property @property
def platform(self): def platform(self) -> Optional[str]: # type: ignore[override]
return self._details['os'].get('family') return self._details['os'].get('family')
@property @property
@ -343,11 +343,11 @@ class ParsedUserAgent(UserAgent):
return self._aggregate_version(self._details['os']) return self._aggregate_version(self._details['os'])
@property @property
def browser(self): def browser(self) -> Optional[str]: # type: ignore[override]
return self._details['user_agent'].get('family') return self._details['user_agent'].get('family')
@property @property
def version(self): def version(self) -> Optional[str]: # type: ignore[override]
return self._aggregate_version(self._details['user_agent']) return self._aggregate_version(self._details['user_agent'])
def _aggregate_version(self, details: Dict[str, str]) -> Optional[str]: def _aggregate_version(self, details: Dict[str, str]) -> Optional[str]:
@ -357,5 +357,5 @@ class ParsedUserAgent(UserAgent):
if (part := details.get(key)) is not None if (part := details.get(key)) is not None
) )
def __str__(self): def __str__(self) -> str:
return f'OS: {self.platform} - Browser: {self.browser} {self.version} - UA: {self.string}' return f'OS: {self.platform} - Browser: {self.browser} {self.version} - UA: {self.string}'

View File

@ -1,5 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import hashlib import hashlib
import logging import logging
# import re # import re
@ -7,7 +9,7 @@ from collections import defaultdict
from typing import Dict, Iterable, List, Optional, Set, Tuple from typing import Dict, Iterable, List, Optional, Set, Tuple
from urllib.parse import urlsplit from urllib.parse import urlsplit
from har2tree import CrawledTree from har2tree import CrawledTree # type: ignore[attr-defined]
from redis import ConnectionPool, Redis from redis import ConnectionPool, Redis
from redis.connection import UnixDomainSocketConnection from redis.connection import UnixDomainSocketConnection
@ -23,11 +25,11 @@ class Indexing():
self.redis_pool: ConnectionPool = ConnectionPool(connection_class=UnixDomainSocketConnection, self.redis_pool: ConnectionPool = ConnectionPool(connection_class=UnixDomainSocketConnection,
path=get_socket_path('indexing'), decode_responses=True) path=get_socket_path('indexing'), decode_responses=True)
def clear_indexes(self): def clear_indexes(self) -> None:
self.redis.flushdb() self.redis.flushdb()
@property @property
def redis(self): def redis(self) -> Redis: # type: ignore[type-arg]
return Redis(connection_pool=self.redis_pool) return Redis(connection_pool=self.redis_pool)
def new_internal_uuids(self, crawled_tree: CrawledTree) -> None: def new_internal_uuids(self, crawled_tree: CrawledTree) -> None:
@ -45,25 +47,25 @@ class Indexing():
# ###### Cookies ###### # ###### Cookies ######
@property @property
def cookies_names(self) -> List[Tuple[str, float]]: def cookies_names(self) -> list[tuple[str, float]]:
return self.redis.zrevrange('cookies_names', 0, -1, withscores=True) return self.redis.zrevrange('cookies_names', 0, -1, withscores=True)
def cookies_names_number_domains(self, cookie_name: str) -> int: def cookies_names_number_domains(self, cookie_name: str) -> int:
return self.redis.zcard(f'cn|{cookie_name}') return self.redis.zcard(f'cn|{cookie_name}')
def cookies_names_domains_values(self, cookie_name: str, domain: str) -> List[Tuple[str, float]]: def cookies_names_domains_values(self, cookie_name: str, domain: str) -> list[tuple[str, float]]:
return self.redis.zrevrange(f'cn|{cookie_name}|{domain}', 0, -1, withscores=True) return self.redis.zrevrange(f'cn|{cookie_name}|{domain}', 0, -1, withscores=True)
def get_cookie_domains(self, cookie_name: str) -> List[Tuple[str, float]]: def get_cookie_domains(self, cookie_name: str) -> list[tuple[str, float]]:
return self.redis.zrevrange(f'cn|{cookie_name}', 0, -1, withscores=True) return self.redis.zrevrange(f'cn|{cookie_name}', 0, -1, withscores=True)
def get_cookies_names_captures(self, cookie_name: str) -> List[Tuple[str, str]]: def get_cookies_names_captures(self, cookie_name: str) -> list[tuple[str, str]]:
return [uuids.split('|') for uuids in self.redis.smembers(f'cn|{cookie_name}|captures')] return [uuids.split('|') for uuids in self.redis.smembers(f'cn|{cookie_name}|captures')]
def _reindex_cookies_capture(self, crawled_tree: CrawledTree) -> None: def _reindex_cookies_capture(self, crawled_tree: CrawledTree) -> None:
pipeline = self.redis.pipeline() pipeline = self.redis.pipeline()
already_loaded: Set[Tuple[str, str]] = set() already_loaded: set[tuple[str, str]] = set()
already_cleaned_up: Set[str] = set() already_cleaned_up: set[str] = set()
for urlnode in crawled_tree.root_hartree.url_tree.traverse(): for urlnode in crawled_tree.root_hartree.url_tree.traverse():
if 'cookies_received' not in urlnode.features: if 'cookies_received' not in urlnode.features:
continue continue
@ -90,7 +92,7 @@ class Indexing():
self.redis.sadd('indexed_cookies', crawled_tree.uuid) self.redis.sadd('indexed_cookies', crawled_tree.uuid)
pipeline = self.redis.pipeline() pipeline = self.redis.pipeline()
already_loaded: Set[Tuple[str, str]] = set() already_loaded: set[tuple[str, str]] = set()
for urlnode in crawled_tree.root_hartree.url_tree.traverse(): for urlnode in crawled_tree.root_hartree.url_tree.traverse():
if 'cookies_received' not in urlnode.features: if 'cookies_received' not in urlnode.features:
continue continue
@ -131,13 +133,13 @@ class Indexing():
# ###### Body hashes ###### # ###### Body hashes ######
@property @property
def ressources(self) -> List[Tuple[str, float]]: def ressources(self) -> list[tuple[str, float]]:
return self.redis.zrevrange('body_hashes', 0, 200, withscores=True) return self.redis.zrevrange('body_hashes', 0, 200, withscores=True)
def ressources_number_domains(self, h: str) -> int: def ressources_number_domains(self, h: str) -> int:
return self.redis.zcard(f'bh|{h}') return self.redis.zcard(f'bh|{h}')
def body_hash_fequency(self, body_hash: str) -> Dict[str, int]: def body_hash_fequency(self, body_hash: str) -> dict[str, int]:
pipeline = self.redis.pipeline() pipeline = self.redis.pipeline()
pipeline.zscore('body_hashes', body_hash) pipeline.zscore('body_hashes', body_hash)
pipeline.zcard(f'bh|{body_hash}') pipeline.zcard(f'bh|{body_hash}')
@ -151,7 +153,7 @@ class Indexing():
def _reindex_body_hashes_capture(self, crawled_tree: CrawledTree) -> None: def _reindex_body_hashes_capture(self, crawled_tree: CrawledTree) -> None:
# if the capture is regenerated, the hostnodes/urlnodes UUIDs are changed # if the capture is regenerated, the hostnodes/urlnodes UUIDs are changed
cleaned_up_hashes: Set[str] = set() cleaned_up_hashes: set[str] = set()
pipeline = self.redis.pipeline() pipeline = self.redis.pipeline()
for urlnode in crawled_tree.root_hartree.url_tree.traverse(): for urlnode in crawled_tree.root_hartree.url_tree.traverse():
for h in urlnode.resources_hashes: for h in urlnode.resources_hashes:
@ -181,17 +183,17 @@ class Indexing():
f'{urlnode.uuid}|{urlnode.hostnode_uuid}|{urlnode.name}') f'{urlnode.uuid}|{urlnode.hostnode_uuid}|{urlnode.name}')
pipeline.execute() pipeline.execute()
def get_hash_uuids(self, body_hash: str) -> Tuple[str, str, str]: def get_hash_uuids(self, body_hash: str) -> tuple[str, str, str]:
"""Use that to get a reference allowing to fetch a resource from one of the capture.""" """Use that to get a reference allowing to fetch a resource from one of the capture."""
capture_uuid: str = self.redis.srandmember(f'bh|{body_hash}|captures') capture_uuid = str(self.redis.srandmember(f'bh|{body_hash}|captures'))
entry = self.redis.zrange(f'bh|{body_hash}|captures|{capture_uuid}', 0, 1)[0] entry = self.redis.zrange(f'bh|{body_hash}|captures|{capture_uuid}', 0, 1)[0]
urlnode_uuid, hostnode_uuid, url = entry.split('|', 2) urlnode_uuid, hostnode_uuid, url = entry.split('|', 2)
return capture_uuid, urlnode_uuid, hostnode_uuid return capture_uuid, urlnode_uuid, hostnode_uuid
def get_body_hash_captures(self, body_hash: str, filter_url: Optional[str]=None, def get_body_hash_captures(self, body_hash: str, filter_url: str | None=None,
filter_capture_uuid: Optional[str]=None, filter_capture_uuid: str | None=None,
limit: int=20, limit: int=20,
prefered_uuids: Set[str]=set()) -> Tuple[int, List[Tuple[str, str, str, bool]]]: prefered_uuids: set[str]=set()) -> tuple[int, list[tuple[str, str, str, bool]]]:
'''Get the captures matching the hash. '''Get the captures matching the hash.
:param filter_url: URL of the hash we're searching for :param filter_url: URL of the hash we're searching for
@ -199,7 +201,7 @@ class Indexing():
:param limit: Max matching captures to return, -1 means unlimited. :param limit: Max matching captures to return, -1 means unlimited.
:param prefered_uuids: UUID cached right now, so we don't rebuild trees. :param prefered_uuids: UUID cached right now, so we don't rebuild trees.
''' '''
to_return: List[Tuple[str, str, str, bool]] = [] to_return: list[tuple[str, str, str, bool]] = []
len_captures = self.redis.scard(f'bh|{body_hash}|captures') len_captures = self.redis.scard(f'bh|{body_hash}|captures')
unlimited = False unlimited = False
if limit == -1: if limit == -1:
@ -224,11 +226,11 @@ class Indexing():
break break
return len_captures, to_return return len_captures, to_return
def get_body_hash_domains(self, body_hash: str) -> List[Tuple[str, float]]: def get_body_hash_domains(self, body_hash: str) -> list[tuple[str, float]]:
return self.redis.zrevrange(f'bh|{body_hash}', 0, -1, withscores=True) return self.redis.zrevrange(f'bh|{body_hash}', 0, -1, withscores=True)
def get_body_hash_urls(self, body_hash: str) -> Dict[str, List[Dict[str, str]]]: def get_body_hash_urls(self, body_hash: str) -> dict[str, list[dict[str, str]]]:
all_captures: Set[str] = self.redis.smembers(f'bh|{body_hash}|captures') all_captures: set[str] = self.redis.smembers(f'bh|{body_hash}|captures')
urls = defaultdict(list) urls = defaultdict(list)
for capture_uuid in list(all_captures): for capture_uuid in list(all_captures):
for entry in self.redis.zrevrange(f'bh|{body_hash}|captures|{capture_uuid}', 0, -1): for entry in self.redis.zrevrange(f'bh|{body_hash}|captures|{capture_uuid}', 0, -1):
@ -239,19 +241,19 @@ class Indexing():
# ###### HTTP Headers Hashes ###### # ###### HTTP Headers Hashes ######
@property @property
def http_headers_hashes(self) -> List[Tuple[str, float]]: def http_headers_hashes(self) -> list[tuple[str, float]]:
return self.redis.zrevrange('hhhashes', 0, -1, withscores=True) return self.redis.zrevrange('hhhashes', 0, -1, withscores=True)
def http_headers_hashes_number_captures(self, hhh: str) -> int: def http_headers_hashes_number_captures(self, hhh: str) -> int:
return self.redis.scard(f'hhhashes|{hhh}|captures') return self.redis.scard(f'hhhashes|{hhh}|captures')
def get_http_headers_hashes_captures(self, hhh: str) -> List[Tuple[str, str]]: def get_http_headers_hashes_captures(self, hhh: str) -> list[tuple[str, str]]:
return [uuids.split('|') for uuids in self.redis.smembers(f'hhhashes|{hhh}|captures')] return [uuids.split('|') for uuids in self.redis.smembers(f'hhhashes|{hhh}|captures')]
def _reindex_http_headers_hashes_capture(self, crawled_tree: CrawledTree) -> None: def _reindex_http_headers_hashes_capture(self, crawled_tree: CrawledTree) -> None:
pipeline = self.redis.pipeline() pipeline = self.redis.pipeline()
already_loaded: Set[str] = set() already_loaded: set[str] = set()
already_cleaned_up: Set[str] = set() already_cleaned_up: set[str] = set()
for urlnode in crawled_tree.root_hartree.url_tree.traverse(): for urlnode in crawled_tree.root_hartree.url_tree.traverse():
if 'hhhash' not in urlnode.features: if 'hhhash' not in urlnode.features:
continue continue
@ -276,7 +278,7 @@ class Indexing():
self.redis.sadd('indexed_hhhashes', crawled_tree.uuid) self.redis.sadd('indexed_hhhashes', crawled_tree.uuid)
pipeline = self.redis.pipeline() pipeline = self.redis.pipeline()
already_loaded: Set[str] = set() already_loaded: set[str] = set()
for urlnode in crawled_tree.root_hartree.url_tree.traverse(): for urlnode in crawled_tree.root_hartree.url_tree.traverse():
if 'hhhash' not in urlnode.features: if 'hhhash' not in urlnode.features:
continue continue
@ -291,11 +293,11 @@ class Indexing():
# ###### URLs and Domains ###### # ###### URLs and Domains ######
@property @property
def urls(self) -> List[Tuple[str, float]]: def urls(self) -> list[tuple[str, float]]:
return self.redis.zrevrange('urls', 0, 200, withscores=True) return self.redis.zrevrange('urls', 0, 200, withscores=True)
@property @property
def hostnames(self) -> List[Tuple[str, float]]: def hostnames(self) -> list[tuple[str, float]]:
return self.redis.zrevrange('hostnames', 0, 200, withscores=True) return self.redis.zrevrange('hostnames', 0, 200, withscores=True)
def index_url_capture(self, crawled_tree: CrawledTree) -> None: def index_url_capture(self, crawled_tree: CrawledTree) -> None:
@ -316,21 +318,21 @@ class Indexing():
pipeline.sadd(f'urls|{md5}|captures', crawled_tree.uuid) pipeline.sadd(f'urls|{md5}|captures', crawled_tree.uuid)
pipeline.execute() pipeline.execute()
def get_captures_url(self, url: str) -> Set[str]: def get_captures_url(self, url: str) -> set[str]:
md5 = hashlib.md5(url.encode()).hexdigest() md5 = hashlib.md5(url.encode()).hexdigest()
return self.redis.smembers(f'urls|{md5}|captures') return self.redis.smembers(f'urls|{md5}|captures')
def get_captures_hostname(self, hostname: str) -> Set[str]: def get_captures_hostname(self, hostname: str) -> set[str]:
return self.redis.smembers(f'hostnames|{hostname}|captures') return self.redis.smembers(f'hostnames|{hostname}|captures')
# ###### Categories ###### # ###### Categories ######
@property @property
def categories(self) -> List[Tuple[str, int]]: def categories(self) -> list[tuple[str, int]]:
return [(c, int(score)) return [(c, int(score))
for c, score in self.redis.zrevrange('categories', 0, 200, withscores=True)] for c, score in self.redis.zrevrange('categories', 0, 200, withscores=True)]
def index_categories_capture(self, capture_uuid: str, categories: Iterable[str]): def index_categories_capture(self, capture_uuid: str, categories: Iterable[str]) -> None:
if not categories: if not categories:
return return
if self.redis.sismember('indexed_categories', capture_uuid): if self.redis.sismember('indexed_categories', capture_uuid):
@ -345,5 +347,5 @@ class Indexing():
pipeline.sadd(category, capture_uuid) pipeline.sadd(category, capture_uuid)
pipeline.execute() pipeline.execute()
def get_captures_category(self, category: str) -> Set[str]: def get_captures_category(self, category: str) -> set[str]:
return self.redis.smembers(category) return self.redis.smembers(category)

View File

@ -1,5 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import base64 import base64
import copy import copy
import gzip import gzip
@ -22,7 +24,7 @@ from uuid import uuid4
from zipfile import ZipFile from zipfile import ZipFile
from defang import defang # type: ignore from defang import defang # type: ignore
from har2tree import CrawledTree, HostNode, URLNode from har2tree import CrawledTree, HostNode, URLNode # type: ignore[attr-defined]
from lacuscore import (LacusCore, from lacuscore import (LacusCore,
CaptureStatus as CaptureStatusCore, CaptureStatus as CaptureStatusCore,
# CaptureResponse as CaptureResponseCore) # CaptureResponse as CaptureResponseCore)
@ -30,15 +32,15 @@ from lacuscore import (LacusCore,
CaptureSettings as CaptureSettingsCore) CaptureSettings as CaptureSettingsCore)
from PIL import Image, UnidentifiedImageError from PIL import Image, UnidentifiedImageError
from playwrightcapture import get_devices from playwrightcapture import get_devices
from pylacus import (PyLacus, from pylacus import (PyLacus, # type: ignore[attr-defined]
CaptureStatus as CaptureStatusPy CaptureStatus as CaptureStatusPy
# CaptureResponse as CaptureResponsePy, # CaptureResponse as CaptureResponsePy,
# CaptureResponseJson as CaptureResponseJsonPy, # CaptureResponseJson as CaptureResponseJsonPy,
# CaptureSettings as CaptureSettingsPy # CaptureSettings as CaptureSettingsPy
) )
from pymisp import MISPAttribute, MISPEvent, MISPObject from pymisp import MISPAttribute, MISPEvent, MISPObject # type: ignore[attr-defined]
from pysecuritytxt import PySecurityTXT, SecurityTXTNotAvailable from pysecuritytxt import PySecurityTXT, SecurityTXTNotAvailable # type: ignore[attr-defined]
from pylookyloomonitoring import PyLookylooMonitoring from pylookyloomonitoring import PyLookylooMonitoring # type: ignore[attr-defined]
from redis import ConnectionPool, Redis from redis import ConnectionPool, Redis
from redis.connection import UnixDomainSocketConnection from redis.connection import UnixDomainSocketConnection
@ -62,13 +64,13 @@ if TYPE_CHECKING:
class CaptureSettings(CaptureSettingsCore, total=False): class CaptureSettings(CaptureSettingsCore, total=False):
'''The capture settings that can be passed to Lookyloo''' '''The capture settings that can be passed to Lookyloo'''
listing: Optional[int] listing: int | None
not_queued: Optional[int] not_queued: int | None
auto_report: Optional[Union[bool, str, Dict[str, str]]] auto_report: bool | str | dict[str, str] | None
dnt: Optional[str] dnt: str | None
browser_name: Optional[str] browser_name: str | None
os: Optional[str] os: str | None
parent: Optional[str] parent: str | None
class Lookyloo(): class Lookyloo():
@ -153,13 +155,13 @@ class Lookyloo():
self.lacus self.lacus
@property @property
def redis(self): def redis(self) -> Redis: # type: ignore[type-arg]
return Redis(connection_pool=self.redis_pool) return Redis(connection_pool=self.redis_pool)
@cached_property @cached_property
def lacus(self): def lacus(self) -> PyLacus | LacusCore:
has_remote_lacus = False has_remote_lacus = False
self._lacus: Union[PyLacus, LacusCore] self._lacus: PyLacus | LacusCore
if get_config('generic', 'remote_lacus'): if get_config('generic', 'remote_lacus'):
remote_lacus_config = get_config('generic', 'remote_lacus') remote_lacus_config = get_config('generic', 'remote_lacus')
if remote_lacus_config.get('enable'): if remote_lacus_config.get('enable'):
@ -180,7 +182,7 @@ class Lookyloo():
if not has_remote_lacus: if not has_remote_lacus:
# We need a redis connector that doesn't decode. # We need a redis connector that doesn't decode.
redis: Redis = Redis(unix_socket_path=get_socket_path('cache')) redis: Redis = Redis(unix_socket_path=get_socket_path('cache')) # type: ignore[type-arg]
self._lacus = LacusCore(redis, tor_proxy=get_config('generic', 'tor_proxy'), self._lacus = LacusCore(redis, tor_proxy=get_config('generic', 'tor_proxy'),
max_capture_time=get_config('generic', 'max_capture_time'), max_capture_time=get_config('generic', 'max_capture_time'),
only_global_lookups=get_config('generic', 'only_global_lookups'), only_global_lookups=get_config('generic', 'only_global_lookups'),
@ -188,14 +190,14 @@ class Lookyloo():
return self._lacus return self._lacus
def add_context(self, capture_uuid: str, /, urlnode_uuid: str, *, ressource_hash: str, def add_context(self, capture_uuid: str, /, urlnode_uuid: str, *, ressource_hash: str,
legitimate: bool, malicious: bool, details: Dict[str, Dict[str, str]]): legitimate: bool, malicious: bool, details: dict[str, dict[str, str]]) -> None:
'''Adds context information to a capture or a URL node''' '''Adds context information to a capture or a URL node'''
if malicious: if malicious:
self.context.add_malicious(ressource_hash, details['malicious']) self.context.add_malicious(ressource_hash, details['malicious'])
if legitimate: if legitimate:
self.context.add_legitimate(ressource_hash, details['legitimate']) self.context.add_legitimate(ressource_hash, details['legitimate'])
def add_to_legitimate(self, capture_uuid: str, /, hostnode_uuid: Optional[str]=None, urlnode_uuid: Optional[str]=None): def add_to_legitimate(self, capture_uuid: str, /, hostnode_uuid: str | None=None, urlnode_uuid: str | None=None) -> None:
'''Mark a full capture as legitimate. '''Mark a full capture as legitimate.
Iterates over all the nodes and mark them all as legitimate too.''' Iterates over all the nodes and mark them all as legitimate too.'''
ct = self.get_crawled_tree(capture_uuid) ct = self.get_crawled_tree(capture_uuid)
@ -225,12 +227,12 @@ class Lookyloo():
ct = self.get_crawled_tree(capture_uuid) ct = self.get_crawled_tree(capture_uuid)
return ct.root_hartree.get_host_node_by_uuid(node_uuid) return ct.root_hartree.get_host_node_by_uuid(node_uuid)
def get_statistics(self, capture_uuid: str, /) -> Dict[str, Any]: def get_statistics(self, capture_uuid: str, /) -> dict[str, Any]:
'''Get the statistics of a capture.''' '''Get the statistics of a capture.'''
ct = self.get_crawled_tree(capture_uuid) ct = self.get_crawled_tree(capture_uuid)
return ct.root_hartree.stats return ct.root_hartree.stats
def get_info(self, capture_uuid: str, /) -> Dict[str, Any]: def get_info(self, capture_uuid: str, /) -> dict[str, Any]:
'''Get basic information about the capture.''' '''Get basic information about the capture.'''
cache = self.capture_cache(capture_uuid) cache = self.capture_cache(capture_uuid)
if not cache: if not cache:
@ -254,7 +256,7 @@ class Lookyloo():
to_return['referer'] = cache.referer if cache.referer else '' to_return['referer'] = cache.referer if cache.referer else ''
return to_return return to_return
def get_meta(self, capture_uuid: str, /) -> Dict[str, str]: def get_meta(self, capture_uuid: str, /) -> dict[str, str]:
'''Get the meta informations from a capture (mostly, details about the User Agent used.)''' '''Get the meta informations from a capture (mostly, details about the User Agent used.)'''
cache = self.capture_cache(capture_uuid) cache = self.capture_cache(capture_uuid)
if not cache: if not cache:
@ -294,7 +296,7 @@ class Lookyloo():
return json.load(f) return json.load(f)
return {} return {}
def categories_capture(self, capture_uuid: str, /) -> Dict[str, Any]: def categories_capture(self, capture_uuid: str, /) -> dict[str, Any]:
'''Get all the categories related to a capture, in MISP Taxonomies format''' '''Get all the categories related to a capture, in MISP Taxonomies format'''
categ_file = self._captures_index[capture_uuid].capture_dir / 'categories' categ_file = self._captures_index[capture_uuid].capture_dir / 'categories'
# get existing categories if possible # get existing categories if possible
@ -337,7 +339,7 @@ class Lookyloo():
with categ_file.open('w') as f: with categ_file.open('w') as f:
f.writelines(f'{t}\n' for t in current_categories) f.writelines(f'{t}\n' for t in current_categories)
def trigger_modules(self, capture_uuid: str, /, force: bool=False, auto_trigger: bool=False) -> Dict: def trigger_modules(self, capture_uuid: str, /, force: bool=False, auto_trigger: bool=False) -> dict[str, Any]:
'''Launch the 3rd party modules on a capture. '''Launch the 3rd party modules on a capture.
It uses the cached result *if* the module was triggered the same day. It uses the cached result *if* the module was triggered the same day.
The `force` flag re-triggers the module regardless of the cache.''' The `force` flag re-triggers the module regardless of the cache.'''
@ -350,8 +352,8 @@ class Lookyloo():
self.uwhois.capture_default_trigger(ct, force=force, auto_trigger=auto_trigger) self.uwhois.capture_default_trigger(ct, force=force, auto_trigger=auto_trigger)
self.hashlookup.capture_default_trigger(ct, auto_trigger=auto_trigger) self.hashlookup.capture_default_trigger(ct, auto_trigger=auto_trigger)
to_return: Dict[str, Dict] = {'PhishingInitiative': {}, 'VirusTotal': {}, 'UrlScan': {}, to_return: dict[str, dict[str, Any]] = {'PhishingInitiative': {}, 'VirusTotal': {}, 'UrlScan': {},
'URLhaus': {}} 'URLhaus': {}}
if cache := self.capture_cache(capture_uuid): if cache := self.capture_cache(capture_uuid):
to_return['PhishingInitiative'] = self.pi.capture_default_trigger(cache, force=force, auto_trigger=auto_trigger) to_return['PhishingInitiative'] = self.pi.capture_default_trigger(cache, force=force, auto_trigger=auto_trigger)
to_return['VirusTotal'] = self.vt.capture_default_trigger(cache, force=force, auto_trigger=auto_trigger) to_return['VirusTotal'] = self.vt.capture_default_trigger(cache, force=force, auto_trigger=auto_trigger)
@ -363,7 +365,7 @@ class Lookyloo():
to_return['URLhaus'] = self.urlhaus.capture_default_trigger(cache, auto_trigger=auto_trigger) to_return['URLhaus'] = self.urlhaus.capture_default_trigger(cache, auto_trigger=auto_trigger)
return to_return return to_return
def get_modules_responses(self, capture_uuid: str, /) -> Optional[Dict[str, Any]]: def get_modules_responses(self, capture_uuid: str, /) -> dict[str, Any] | None:
'''Get the responses of the modules from the cached responses on the disk''' '''Get the responses of the modules from the cached responses on the disk'''
cache = self.capture_cache(capture_uuid) cache = self.capture_cache(capture_uuid)
if not cache: if not cache:
@ -373,7 +375,7 @@ class Lookyloo():
self.logger.warning(f'The capture {capture_uuid} does not have a URL in the cache, it is broken.') self.logger.warning(f'The capture {capture_uuid} does not have a URL in the cache, it is broken.')
return None return None
to_return: Dict[str, Any] = {} to_return: dict[str, Any] = {}
if self.vt.available: if self.vt.available:
to_return['vt'] = {} to_return['vt'] = {}
if hasattr(cache, 'redirects') and cache.redirects: if hasattr(cache, 'redirects') and cache.redirects:
@ -416,7 +418,7 @@ class Lookyloo():
to_return['urlscan']['result'] = result to_return['urlscan']['result'] = result
return to_return return to_return
def get_historical_lookups(self, capture_uuid: str, /, force: bool=False) -> Dict: def get_historical_lookups(self, capture_uuid: str, /, force: bool=False) -> dict[str, Any]:
# this method is only trigered when the user wants to get more details about the capture # this method is only trigered when the user wants to get more details about the capture
# by looking at Passive DNS systems, check if there are hits in the current capture # by looking at Passive DNS systems, check if there are hits in the current capture
# in another one and things like that. The trigger_modules method is for getting # in another one and things like that. The trigger_modules method is for getting
@ -425,7 +427,7 @@ class Lookyloo():
if not cache: if not cache:
self.logger.warning(f'Unable to get the modules responses unless the capture {capture_uuid} is cached') self.logger.warning(f'Unable to get the modules responses unless the capture {capture_uuid} is cached')
return {} return {}
to_return: Dict[str, Any] = defaultdict(dict) to_return: dict[str, Any] = defaultdict(dict)
if self.riskiq.available: if self.riskiq.available:
try: try:
self.riskiq.capture_default_trigger(cache) self.riskiq.capture_default_trigger(cache)
@ -461,7 +463,7 @@ class Lookyloo():
def update_tree_cache_info(self, process_id: int, classname: str) -> None: def update_tree_cache_info(self, process_id: int, classname: str) -> None:
self.redis.hset('tree_cache', f'{process_id}|{classname}', str(self._captures_index.lru_cache_status())) self.redis.hset('tree_cache', f'{process_id}|{classname}', str(self._captures_index.lru_cache_status()))
def sorted_capture_cache(self, capture_uuids: Optional[Iterable[str]]=None, cached_captures_only: bool=True, index_cut_time: Optional[datetime]=None) -> List[CaptureCache]: def sorted_capture_cache(self, capture_uuids: Iterable[str] | None=None, cached_captures_only: bool=True, index_cut_time: datetime | None=None) -> list[CaptureCache]:
'''Get all the captures in the cache, sorted by timestamp (new -> old). '''Get all the captures in the cache, sorted by timestamp (new -> old).
By default, this method will only return the captures that are currently cached.''' By default, this method will only return the captures that are currently cached.'''
# Make sure we do not try to load archived captures that would still be in 'lookup_dirs' # Make sure we do not try to load archived captures that would still be in 'lookup_dirs'
@ -489,13 +491,13 @@ class Lookyloo():
# Do not try to build pickles # Do not try to build pickles
capture_uuids = set(capture_uuids) & self._captures_index.cached_captures capture_uuids = set(capture_uuids) & self._captures_index.cached_captures
all_cache: List[CaptureCache] = [self._captures_index[uuid] for uuid in capture_uuids all_cache: list[CaptureCache] = [self._captures_index[uuid] for uuid in capture_uuids
if self.capture_cache(uuid) if self.capture_cache(uuid)
and hasattr(self._captures_index[uuid], 'timestamp')] and hasattr(self._captures_index[uuid], 'timestamp')]
all_cache.sort(key=operator.attrgetter('timestamp'), reverse=True) all_cache.sort(key=operator.attrgetter('timestamp'), reverse=True)
return all_cache return all_cache
def get_capture_status(self, capture_uuid: str, /) -> Union[CaptureStatusCore, CaptureStatusPy]: def get_capture_status(self, capture_uuid: str, /) -> CaptureStatusCore | CaptureStatusPy:
'''Returns the status (queued, ongoing, done, or UUID unknown)''' '''Returns the status (queued, ongoing, done, or UUID unknown)'''
if self.redis.hexists('lookup_dirs', capture_uuid): if self.redis.hexists('lookup_dirs', capture_uuid):
return CaptureStatusCore.DONE return CaptureStatusCore.DONE
@ -520,7 +522,7 @@ class Lookyloo():
return CaptureStatusCore.ONGOING return CaptureStatusCore.ONGOING
return lacus_status return lacus_status
def capture_cache(self, capture_uuid: str, /, *, force_update: bool = False) -> Optional[CaptureCache]: def capture_cache(self, capture_uuid: str, /, *, force_update: bool = False) -> CaptureCache | None:
"""Get the cache from redis, rebuild the tree if the internal UUID changed => slow""" """Get the cache from redis, rebuild the tree if the internal UUID changed => slow"""
try: try:
cache = self._captures_index[capture_uuid] cache = self._captures_index[capture_uuid]
@ -598,7 +600,7 @@ class Lookyloo():
query['user_agent'] = user_agent if user_agent else self.user_agents.default['useragent'] query['user_agent'] = user_agent if user_agent else self.user_agents.default['useragent']
# NOTE: the document must be base64 encoded # NOTE: the document must be base64 encoded
document: Optional[Union[str, bytes]] = query.pop('document', None) document: str | bytes | None = query.pop('document', None)
if document: if document:
if isinstance(document, bytes): if isinstance(document, bytes):
query['document'] = base64.b64encode(document).decode() query['document'] = base64.b64encode(document).decode()
@ -631,17 +633,16 @@ class Lookyloo():
query = self._prepare_lacus_query(query) query = self._prepare_lacus_query(query)
priority = get_priority(source, user, authenticated) priority = get_priority(source, user, authenticated)
query['priority'] = priority
if priority < -100: if priority < -100:
# Someone is probably abusing the system with useless URLs, remove them from the index # Someone is probably abusing the system with useless URLs, remove them from the index
query['listing'] = 0 query['listing'] = 0
try: try:
perma_uuid = self.lacus.enqueue( perma_uuid = self.lacus.enqueue( # type: ignore[misc]
url=query.get('url', None), url=query.get('url', None),
document_name=query.get('document_name', None), document_name=query.get('document_name', None),
document=query.get('document', None), document=query.get('document', None),
# depth=query.get('depth', 0), # depth=query.get('depth', 0),
browser=query.get('browser', None), browser=query.get('browser', None), # type: ignore[arg-type]
device_name=query.get('device_name', None), device_name=query.get('device_name', None),
user_agent=query.get('user_agent', None), user_agent=query.get('user_agent', None),
proxy=self.global_proxy if self.global_proxy else query.get('proxy', None), proxy=self.global_proxy if self.global_proxy else query.get('proxy', None),
@ -659,7 +660,7 @@ class Lookyloo():
with_favicon=query.get('with_favicon', True), with_favicon=query.get('with_favicon', True),
# force=query.get('force', False), # force=query.get('force', False),
# recapture_interval=query.get('recapture_interval', 300), # recapture_interval=query.get('recapture_interval', 300),
priority=query.get('priority', 0) priority=priority
) )
except Exception as e: except Exception as e:
self.logger.critical(f'Unable to enqueue capture: {e}') self.logger.critical(f'Unable to enqueue capture: {e}')
@ -670,7 +671,7 @@ class Lookyloo():
and self.redis.zscore('to_capture', perma_uuid) is None): # capture ongoing and self.redis.zscore('to_capture', perma_uuid) is None): # capture ongoing
# Make the settings redis compatible # Make the settings redis compatible
mapping_capture: Dict[str, Union[bytes, float, int, str]] = {} mapping_capture: dict[str, bytes | float | int | str] = {}
for key, value in query.items(): for key, value in query.items():
if isinstance(value, bool): if isinstance(value, bool):
mapping_capture[key] = 1 if value else 0 mapping_capture[key] = 1 if value else 0
@ -681,15 +682,15 @@ class Lookyloo():
mapping_capture[key] = value # type: ignore mapping_capture[key] = value # type: ignore
p = self.redis.pipeline() p = self.redis.pipeline()
p.zadd('to_capture', {perma_uuid: query['priority']}) p.zadd('to_capture', {perma_uuid: priority})
p.hset(perma_uuid, mapping=mapping_capture) p.hset(perma_uuid, mapping=mapping_capture) # type: ignore[arg-type]
p.zincrby('queues', 1, f'{source}|{authenticated}|{user}') p.zincrby('queues', 1, f'{source}|{authenticated}|{user}')
p.set(f'{perma_uuid}_mgmt', f'{source}|{authenticated}|{user}') p.set(f'{perma_uuid}_mgmt', f'{source}|{authenticated}|{user}')
p.execute() p.execute()
return perma_uuid return perma_uuid
def takedown_details(self, hostnode: HostNode) -> Dict[str, Any]: def takedown_details(self, hostnode: HostNode) -> dict[str, Any]:
if not self.uwhois.available: if not self.uwhois.available:
self.logger.warning('UWhois module not enabled, unable to use this method') self.logger.warning('UWhois module not enabled, unable to use this method')
raise LookylooException('UWhois module not enabled, unable to use this method') raise LookylooException('UWhois module not enabled, unable to use this method')
@ -740,7 +741,7 @@ class Lookyloo():
to_return['all_emails'] = list(to_return['all_emails']) to_return['all_emails'] = list(to_return['all_emails'])
return to_return return to_return
def contacts(self, capture_uuid: str, /) -> List[Dict[str, Any]]: def contacts(self, capture_uuid: str, /) -> list[dict[str, Any]]:
capture = self.get_crawled_tree(capture_uuid) capture = self.get_crawled_tree(capture_uuid)
rendered_hostnode = self.get_hostnode_from_tree(capture_uuid, capture.root_hartree.rendered_node.hostnode_uuid) rendered_hostnode = self.get_hostnode_from_tree(capture_uuid, capture.root_hartree.rendered_node.hostnode_uuid)
result = [] result = []
@ -749,7 +750,7 @@ class Lookyloo():
result.append(self.takedown_details(rendered_hostnode)) result.append(self.takedown_details(rendered_hostnode))
return result return result
def send_mail(self, capture_uuid: str, /, email: str='', comment: Optional[str]=None) -> None: def send_mail(self, capture_uuid: str, /, email: str='', comment: str | None=None) -> None:
'''Send an email notification regarding a specific capture''' '''Send an email notification regarding a specific capture'''
if not get_config('generic', 'enable_mail_notification'): if not get_config('generic', 'enable_mail_notification'):
return return
@ -856,7 +857,7 @@ class Lookyloo():
def get_potential_favicons(self, capture_uuid: str, /, all_favicons: Literal[True], for_datauri: Literal[False]) -> BytesIO: def get_potential_favicons(self, capture_uuid: str, /, all_favicons: Literal[True], for_datauri: Literal[False]) -> BytesIO:
... ...
def get_potential_favicons(self, capture_uuid: str, /, all_favicons: bool=False, for_datauri: bool=False) -> Union[BytesIO, str]: def get_potential_favicons(self, capture_uuid: str, /, all_favicons: bool=False, for_datauri: bool=False) -> BytesIO | str:
'''Get rendered HTML''' '''Get rendered HTML'''
fav = self._get_raw(capture_uuid, 'potential_favicons.ico', all_favicons) fav = self._get_raw(capture_uuid, 'potential_favicons.ico', all_favicons)
if not all_favicons and for_datauri: if not all_favicons and for_datauri:
@ -867,7 +868,7 @@ class Lookyloo():
'''Get rendered HTML''' '''Get rendered HTML'''
return self._get_raw(capture_uuid, 'html', all_html) return self._get_raw(capture_uuid, 'html', all_html)
def get_data(self, capture_uuid: str, /) -> Tuple[str, BytesIO]: def get_data(self, capture_uuid: str, /) -> tuple[str, BytesIO]:
'''Get the data''' '''Get the data'''
return self._get_raw(capture_uuid, 'data.filename', False).getvalue().decode(), self._get_raw(capture_uuid, 'data', False) return self._get_raw(capture_uuid, 'data.filename', False).getvalue().decode(), self._get_raw(capture_uuid, 'data', False)
@ -879,7 +880,7 @@ class Lookyloo():
'''Get the screenshot(s) of the rendered page''' '''Get the screenshot(s) of the rendered page'''
return self._get_raw(capture_uuid, 'png', all_files=False) return self._get_raw(capture_uuid, 'png', all_files=False)
def get_screenshot_thumbnail(self, capture_uuid: str, /, for_datauri: bool=False, width: int=64) -> Union[str, BytesIO]: def get_screenshot_thumbnail(self, capture_uuid: str, /, for_datauri: bool=False, width: int=64) -> str | BytesIO:
'''Get the thumbnail of the rendered page. Always crop to a square.''' '''Get the thumbnail of the rendered page. Always crop to a square.'''
to_return = BytesIO() to_return = BytesIO()
size = width, width size = width, width
@ -921,12 +922,12 @@ class Lookyloo():
'''Get all the files related to this capture.''' '''Get all the files related to this capture.'''
return self._get_raw(capture_uuid) return self._get_raw(capture_uuid)
def get_urls_rendered_page(self, capture_uuid: str, /) -> List[str]: def get_urls_rendered_page(self, capture_uuid: str, /) -> list[str]:
ct = self.get_crawled_tree(capture_uuid) ct = self.get_crawled_tree(capture_uuid)
return sorted(set(ct.root_hartree.rendered_node.urls_in_rendered_page) return sorted(set(ct.root_hartree.rendered_node.urls_in_rendered_page)
- set(ct.root_hartree.all_url_requests.keys())) - set(ct.root_hartree.all_url_requests.keys()))
def get_body_hash_investigator(self, body_hash: str, /) -> Tuple[List[Tuple[str, str]], List[Tuple[str, float]]]: def get_body_hash_investigator(self, body_hash: str, /) -> tuple[list[tuple[str, str]], list[tuple[str, float]]]:
'''Returns all the captures related to a hash (sha512), used in the web interface.''' '''Returns all the captures related to a hash (sha512), used in the web interface.'''
total_captures, details = self.indexing.get_body_hash_captures(body_hash, limit=-1) total_captures, details = self.indexing.get_body_hash_captures(body_hash, limit=-1)
cached_captures = self.sorted_capture_cache([d[0] for d in details]) cached_captures = self.sorted_capture_cache([d[0] for d in details])
@ -934,7 +935,7 @@ class Lookyloo():
domains = self.indexing.get_body_hash_domains(body_hash) domains = self.indexing.get_body_hash_domains(body_hash)
return captures, domains return captures, domains
def get_body_hash_full(self, body_hash: str, /) -> Tuple[Dict[str, List[Dict[str, str]]], BytesIO]: def get_body_hash_full(self, body_hash: str, /) -> tuple[dict[str, list[dict[str, str]]], BytesIO]:
'''Returns a lot of information about the hash (sha512) and the hits in the instance. '''Returns a lot of information about the hash (sha512) and the hits in the instance.
Also contains the data (base64 encoded)''' Also contains the data (base64 encoded)'''
details = self.indexing.get_body_hash_urls(body_hash) details = self.indexing.get_body_hash_urls(body_hash)
@ -969,9 +970,9 @@ class Lookyloo():
# TODO: Couldn't find the file anywhere. Maybe return a warning in the file? # TODO: Couldn't find the file anywhere. Maybe return a warning in the file?
return details, BytesIO() return details, BytesIO()
def get_all_body_hashes(self, capture_uuid: str, /) -> Dict[str, Dict[str, Union[URLNode, int]]]: def get_all_body_hashes(self, capture_uuid: str, /) -> dict[str, dict[str, URLNode | int]]:
ct = self.get_crawled_tree(capture_uuid) ct = self.get_crawled_tree(capture_uuid)
to_return: Dict[str, Dict[str, Union[URLNode, int]]] = defaultdict() to_return: dict[str, dict[str, URLNode | int]] = defaultdict()
for node in ct.root_hartree.url_tree.traverse(): for node in ct.root_hartree.url_tree.traverse():
if node.empty_response or node.body_hash in to_return: if node.empty_response or node.body_hash in to_return:
# If we have the same hash more than once, skip # If we have the same hash more than once, skip
@ -981,24 +982,24 @@ class Lookyloo():
to_return[node.body_hash] = {'node': node, 'total_captures': total_captures} to_return[node.body_hash] = {'node': node, 'total_captures': total_captures}
return to_return return to_return
def get_latest_url_capture(self, url: str, /) -> Optional[CaptureCache]: def get_latest_url_capture(self, url: str, /) -> CaptureCache | None:
'''Get the most recent capture with this URL''' '''Get the most recent capture with this URL'''
captures = self.sorted_capture_cache(self.indexing.get_captures_url(url)) captures = self.sorted_capture_cache(self.indexing.get_captures_url(url))
if captures: if captures:
return captures[0] return captures[0]
return None return None
def get_url_occurrences(self, url: str, /, limit: int=20, cached_captures_only: bool=True) -> List[Dict]: def get_url_occurrences(self, url: str, /, limit: int=20, cached_captures_only: bool=True) -> list[dict[str, Any]]:
'''Get the most recent captures and URL nodes where the URL has been seen.''' '''Get the most recent captures and URL nodes where the URL has been seen.'''
captures = self.sorted_capture_cache(self.indexing.get_captures_url(url), cached_captures_only=cached_captures_only) captures = self.sorted_capture_cache(self.indexing.get_captures_url(url), cached_captures_only=cached_captures_only)
to_return: List[Dict] = [] to_return: list[dict[str, Any]] = []
for capture in captures[:limit]: for capture in captures[:limit]:
ct = self.get_crawled_tree(capture.uuid) ct = self.get_crawled_tree(capture.uuid)
to_append: Dict[str, Union[str, Dict]] = {'capture_uuid': capture.uuid, to_append: dict[str, str | dict[str, Any]] = {'capture_uuid': capture.uuid,
'start_timestamp': capture.timestamp.isoformat(), 'start_timestamp': capture.timestamp.isoformat(),
'title': capture.title} 'title': capture.title}
urlnodes: Dict[str, Dict[str, str]] = {} urlnodes: dict[str, dict[str, str]] = {}
for urlnode in ct.root_hartree.url_tree.search_nodes(name=url): for urlnode in ct.root_hartree.url_tree.search_nodes(name=url):
urlnodes[urlnode.uuid] = {'start_time': urlnode.start_time.isoformat(), urlnodes[urlnode.uuid] = {'start_time': urlnode.start_time.isoformat(),
'hostnode_uuid': urlnode.hostnode_uuid} 'hostnode_uuid': urlnode.hostnode_uuid}
@ -1008,19 +1009,20 @@ class Lookyloo():
to_return.append(to_append) to_return.append(to_append)
return to_return return to_return
def get_hostname_occurrences(self, hostname: str, /, with_urls_occurrences: bool=False, limit: int=20, cached_captures_only: bool=True) -> List[Dict]: def get_hostname_occurrences(self, hostname: str, /, with_urls_occurrences: bool=False, limit: int=20, cached_captures_only: bool=True) -> list[dict[str, Any]]:
'''Get the most recent captures and URL nodes where the hostname has been seen.''' '''Get the most recent captures and URL nodes where the hostname has been seen.'''
captures = self.sorted_capture_cache(self.indexing.get_captures_hostname(hostname), cached_captures_only=cached_captures_only) captures = self.sorted_capture_cache(self.indexing.get_captures_hostname(hostname), cached_captures_only=cached_captures_only)
to_return: List[Dict] = [] to_return: list[dict[str, Any]] = []
for capture in captures[:limit]: for capture in captures[:limit]:
ct = self.get_crawled_tree(capture.uuid) ct = self.get_crawled_tree(capture.uuid)
to_append: Dict[str, Union[str, List, Dict]] = {'capture_uuid': capture.uuid, to_append: dict[str, str | list[Any] | dict[str, Any]] = {
'start_timestamp': capture.timestamp.isoformat(), 'capture_uuid': capture.uuid,
'title': capture.title} 'start_timestamp': capture.timestamp.isoformat(),
hostnodes: List[str] = [] 'title': capture.title}
hostnodes: list[str] = []
if with_urls_occurrences: if with_urls_occurrences:
urlnodes: Dict[str, Dict[str, str]] = {} urlnodes: dict[str, dict[str, str]] = {}
for hostnode in ct.root_hartree.hostname_tree.search_nodes(name=hostname): for hostnode in ct.root_hartree.hostname_tree.search_nodes(name=hostname):
hostnodes.append(hostnode.uuid) hostnodes.append(hostnode.uuid)
if with_urls_occurrences: if with_urls_occurrences:
@ -1036,7 +1038,7 @@ class Lookyloo():
to_return.append(to_append) to_return.append(to_append)
return to_return return to_return
def get_cookie_name_investigator(self, cookie_name: str, /) -> Tuple[List[Tuple[str, str]], List[Tuple[str, float, List[Tuple[str, float]]]]]: def get_cookie_name_investigator(self, cookie_name: str, /) -> tuple[list[tuple[str, str]], list[tuple[str, float, list[tuple[str, float]]]]]:
'''Returns all the captures related to a cookie name entry, used in the web interface.''' '''Returns all the captures related to a cookie name entry, used in the web interface.'''
cached_captures = self.sorted_capture_cache([entry[0] for entry in self.indexing.get_cookies_names_captures(cookie_name)]) cached_captures = self.sorted_capture_cache([entry[0] for entry in self.indexing.get_cookies_names_captures(cookie_name)])
captures = [(cache.uuid, cache.title) for cache in cached_captures] captures = [(cache.uuid, cache.title) for cache in cached_captures]
@ -1044,7 +1046,7 @@ class Lookyloo():
for domain, freq in self.indexing.get_cookie_domains(cookie_name)] for domain, freq in self.indexing.get_cookie_domains(cookie_name)]
return captures, domains return captures, domains
def get_hhh_investigator(self, hhh: str, /) -> Tuple[List[Tuple[str, str, str, str]], List[Tuple[str, str]]]: def get_hhh_investigator(self, hhh: str, /) -> tuple[list[tuple[str, str, str, str]], list[tuple[str, str]]]:
'''Returns all the captures related to a cookie name entry, used in the web interface.''' '''Returns all the captures related to a cookie name entry, used in the web interface.'''
all_captures = dict(self.indexing.get_http_headers_hashes_captures(hhh)) all_captures = dict(self.indexing.get_http_headers_hashes_captures(hhh))
if cached_captures := self.sorted_capture_cache([entry for entry in all_captures]): if cached_captures := self.sorted_capture_cache([entry for entry in all_captures]):
@ -1063,11 +1065,11 @@ class Lookyloo():
return captures, headers return captures, headers
return [], [] return [], []
def hash_lookup(self, blob_hash: str, url: str, capture_uuid: str) -> Tuple[int, Dict[str, List[Tuple[str, str, str, str, str]]]]: def hash_lookup(self, blob_hash: str, url: str, capture_uuid: str) -> tuple[int, dict[str, list[tuple[str, str, str, str, str]]]]:
'''Search all the captures a specific hash was seen. '''Search all the captures a specific hash was seen.
If a URL is given, it splits the results if the hash is seen on the same URL or an other one. If a URL is given, it splits the results if the hash is seen on the same URL or an other one.
Capture UUID avoids duplicates on the same capture''' Capture UUID avoids duplicates on the same capture'''
captures_list: Dict[str, List[Tuple[str, str, str, str, str]]] = {'same_url': [], 'different_url': []} captures_list: dict[str, list[tuple[str, str, str, str, str]]] = {'same_url': [], 'different_url': []}
total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid, limit=-1, total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid, limit=-1,
prefered_uuids=set(self._captures_index.keys())) prefered_uuids=set(self._captures_index.keys()))
for h_capture_uuid, url_uuid, url_hostname, same_url in details: for h_capture_uuid, url_uuid, url_hostname, same_url in details:
@ -1082,7 +1084,7 @@ class Lookyloo():
captures_list['different_url'].sort(key=lambda y: y[3]) captures_list['different_url'].sort(key=lambda y: y[3])
return total_captures, captures_list return total_captures, captures_list
def get_ressource(self, tree_uuid: str, /, urlnode_uuid: str, h: Optional[str]) -> Optional[Tuple[str, BytesIO, str]]: def get_ressource(self, tree_uuid: str, /, urlnode_uuid: str, h: str | None) -> tuple[str, BytesIO, str] | None:
'''Get a specific resource from a URL node. If a hash s also given, we want an embeded resource''' '''Get a specific resource from a URL node. If a hash s also given, we want an embeded resource'''
try: try:
url = self.get_urlnode_from_tree(tree_uuid, urlnode_uuid) url = self.get_urlnode_from_tree(tree_uuid, urlnode_uuid)
@ -1108,7 +1110,7 @@ class Lookyloo():
return 'embedded_ressource.bin', BytesIO(blob.getvalue()), mimetype return 'embedded_ressource.bin', BytesIO(blob.getvalue()), mimetype
return None return None
def __misp_add_vt_to_URLObject(self, obj: MISPObject) -> Optional[MISPObject]: def __misp_add_vt_to_URLObject(self, obj: MISPObject) -> MISPObject | None:
urls = obj.get_attributes_by_relation('url') urls = obj.get_attributes_by_relation('url')
if not urls: if not urls:
return None return None
@ -1124,7 +1126,7 @@ class Lookyloo():
obj.add_reference(vt_obj, 'analysed-with') obj.add_reference(vt_obj, 'analysed-with')
return vt_obj return vt_obj
def __misp_add_urlscan_to_event(self, capture_uuid: str, visibility: str) -> Optional[MISPAttribute]: def __misp_add_urlscan_to_event(self, capture_uuid: str, visibility: str) -> MISPAttribute | None:
if cache := self.capture_cache(capture_uuid): if cache := self.capture_cache(capture_uuid):
response = self.urlscan.url_submit(cache, visibility) response = self.urlscan.url_submit(cache, visibility)
if 'result' in response: if 'result' in response:
@ -1134,7 +1136,7 @@ class Lookyloo():
return attribute return attribute
return None return None
def misp_export(self, capture_uuid: str, /, with_parent: bool=False) -> Union[List[MISPEvent], Dict[str, str]]: def misp_export(self, capture_uuid: str, /, with_parent: bool=False) -> list[MISPEvent] | dict[str, str]:
'''Export a capture in MISP format. You can POST the return of this method '''Export a capture in MISP format. You can POST the return of this method
directly to a MISP instance and it will create an event.''' directly to a MISP instance and it will create an event.'''
cache = self.capture_cache(capture_uuid) cache = self.capture_cache(capture_uuid)
@ -1200,7 +1202,7 @@ class Lookyloo():
return [event] return [event]
def get_misp_occurrences(self, capture_uuid: str, /, *, instance_name: Optional[str]=None) -> Optional[Tuple[Dict[str, Set[str]], str]]: def get_misp_occurrences(self, capture_uuid: str, /, *, instance_name: str | None=None) -> tuple[dict[str, set[str]], str] | None:
if instance_name is None: if instance_name is None:
misp = self.misps.default_misp misp = self.misps.default_misp
elif self.misps.get(instance_name) is not None: elif self.misps.get(instance_name) is not None:
@ -1217,7 +1219,7 @@ class Lookyloo():
self.logger.warning(f'Unable to get the modules responses unless the tree ({capture_uuid}) is cached.') self.logger.warning(f'Unable to get the modules responses unless the tree ({capture_uuid}) is cached.')
return None return None
nodes_to_lookup = ct.root_hartree.rendered_node.get_ancestors() + [ct.root_hartree.rendered_node] nodes_to_lookup = ct.root_hartree.rendered_node.get_ancestors() + [ct.root_hartree.rendered_node]
to_return: Dict[str, Set[str]] = defaultdict(set) to_return: dict[str, set[str]] = defaultdict(set)
for node in nodes_to_lookup: for node in nodes_to_lookup:
hits = misp.lookup(node, ct.root_hartree.get_host_node_by_uuid(node.hostnode_uuid)) hits = misp.lookup(node, ct.root_hartree.get_host_node_by_uuid(node.hostnode_uuid))
for event_id, values in hits.items(): for event_id, values in hits.items():
@ -1226,7 +1228,7 @@ class Lookyloo():
to_return[event_id].update(values) to_return[event_id].update(values)
return to_return, misp.client.root_url return to_return, misp.client.root_url
def get_hashes_with_context(self, tree_uuid: str, /, algorithm: str, *, urls_only: bool=False) -> Union[Dict[str, Set[str]], Dict[str, List[URLNode]]]: def get_hashes_with_context(self, tree_uuid: str, /, algorithm: str, *, urls_only: bool=False) -> dict[str, set[str]] | dict[str, list[URLNode]]:
"""Build (on demand) hashes for all the ressources of the tree, using the alorighm provided by the user. """Build (on demand) hashes for all the ressources of the tree, using the alorighm provided by the user.
If you just want the hashes in SHA512, use the get_hashes method, it gives you a list of hashes an they're build If you just want the hashes in SHA512, use the get_hashes method, it gives you a list of hashes an they're build
with the tree. This method is computing the hashes when you query it, so it is slower.""" with the tree. This method is computing the hashes when you query it, so it is slower."""
@ -1236,7 +1238,7 @@ class Lookyloo():
return {h: {node.name for node in nodes} for h, nodes in hashes.items()} return {h: {node.name for node in nodes} for h, nodes in hashes.items()}
return hashes return hashes
def merge_hashlookup_tree(self, tree_uuid: str, /) -> Tuple[Dict[str, Dict[str, Any]], int]: def merge_hashlookup_tree(self, tree_uuid: str, /) -> tuple[dict[str, dict[str, Any]], int]:
if not self.hashlookup.available: if not self.hashlookup.available:
raise LookylooException('Hashlookup module not enabled.') raise LookylooException('Hashlookup module not enabled.')
hashes_tree = self.get_hashes_with_context(tree_uuid, algorithm='sha1') hashes_tree = self.get_hashes_with_context(tree_uuid, algorithm='sha1')
@ -1253,20 +1255,20 @@ class Lookyloo():
with hashlookup_file.open() as f: with hashlookup_file.open() as f:
hashlookup_entries = json.load(f) hashlookup_entries = json.load(f)
to_return: Dict[str, Dict[str, Any]] = defaultdict(dict) to_return: dict[str, dict[str, Any]] = defaultdict(dict)
for sha1 in hashlookup_entries.keys(): for sha1 in hashlookup_entries.keys():
to_return[sha1]['nodes'] = hashes_tree[sha1] to_return[sha1]['nodes'] = hashes_tree[sha1]
to_return[sha1]['hashlookup'] = hashlookup_entries[sha1] to_return[sha1]['hashlookup'] = hashlookup_entries[sha1]
return to_return, len(hashes_tree) return to_return, len(hashes_tree)
def get_hashes(self, tree_uuid: str, /, hostnode_uuid: Optional[str]=None, urlnode_uuid: Optional[str]=None) -> Set[str]: def get_hashes(self, tree_uuid: str, /, hostnode_uuid: str | None=None, urlnode_uuid: str | None=None) -> set[str]:
"""Return hashes (sha512) of resources. """Return hashes (sha512) of resources.
Only tree_uuid: All the hashes Only tree_uuid: All the hashes
tree_uuid and hostnode_uuid: hashes of all the resources in that hostnode (including embedded ressources) tree_uuid and hostnode_uuid: hashes of all the resources in that hostnode (including embedded ressources)
tree_uuid, hostnode_uuid, and urlnode_uuid: hash of the URL node body, and embedded resources tree_uuid, hostnode_uuid, and urlnode_uuid: hash of the URL node body, and embedded resources
""" """
container: Union[CrawledTree, HostNode, URLNode] container: CrawledTree | HostNode | URLNode
if urlnode_uuid: if urlnode_uuid:
container = self.get_urlnode_from_tree(tree_uuid, urlnode_uuid) container = self.get_urlnode_from_tree(tree_uuid, urlnode_uuid)
elif hostnode_uuid: elif hostnode_uuid:
@ -1275,7 +1277,7 @@ class Lookyloo():
container = self.get_crawled_tree(tree_uuid) container = self.get_crawled_tree(tree_uuid)
return get_resources_hashes(container) return get_resources_hashes(container)
def get_hostnames(self, tree_uuid: str, /, hostnode_uuid: Optional[str]=None, urlnode_uuid: Optional[str]=None) -> Set[str]: def get_hostnames(self, tree_uuid: str, /, hostnode_uuid: str | None=None, urlnode_uuid: str | None=None) -> set[str]:
"""Return all the unique hostnames: """Return all the unique hostnames:
* of a complete tree if no hostnode_uuid and urlnode_uuid are given * of a complete tree if no hostnode_uuid and urlnode_uuid are given
* of a HostNode if hostnode_uuid is given * of a HostNode if hostnode_uuid is given
@ -1291,7 +1293,7 @@ class Lookyloo():
ct = self.get_crawled_tree(tree_uuid) ct = self.get_crawled_tree(tree_uuid)
return {node.name for node in ct.root_hartree.hostname_tree.traverse()} return {node.name for node in ct.root_hartree.hostname_tree.traverse()}
def get_urls(self, tree_uuid: str, /, hostnode_uuid: Optional[str]=None, urlnode_uuid: Optional[str]=None) -> Set[str]: def get_urls(self, tree_uuid: str, /, hostnode_uuid: str | None=None, urlnode_uuid: str | None=None) -> set[str]:
"""Return all the unique URLs: """Return all the unique URLs:
* of a complete tree if no hostnode_uuid and urlnode_uuid are given * of a complete tree if no hostnode_uuid and urlnode_uuid are given
* of a HostNode if hostnode_uuid is given * of a HostNode if hostnode_uuid is given
@ -1307,18 +1309,18 @@ class Lookyloo():
ct = self.get_crawled_tree(tree_uuid) ct = self.get_crawled_tree(tree_uuid)
return {node.name for node in ct.root_hartree.url_tree.traverse()} return {node.name for node in ct.root_hartree.url_tree.traverse()}
def get_playwright_devices(self) -> Dict: def get_playwright_devices(self) -> dict[str, Any]:
"""Get the preconfigured devices from Playwright""" """Get the preconfigured devices from Playwright"""
return get_devices() return get_devices()
def get_hostnode_investigator(self, capture_uuid: str, /, node_uuid: str) -> Tuple[HostNode, List[Dict[str, Any]]]: def get_hostnode_investigator(self, capture_uuid: str, /, node_uuid: str) -> tuple[HostNode, list[dict[str, Any]]]:
'''Gather all the informations needed to display the Hostnode investigator popup.''' '''Gather all the informations needed to display the Hostnode investigator popup.'''
def normalize_known_content(h: str, /, known_content: Dict[str, Any], url: URLNode) -> Tuple[Optional[Union[str, List[Any]]], Optional[Tuple[bool, Any]]]: def normalize_known_content(h: str, /, known_content: dict[str, Any], url: URLNode) -> tuple[str | list[Any] | None, tuple[bool, Any] | None]:
''' There are a few different sources to figure out known vs. legitimate content, ''' There are a few different sources to figure out known vs. legitimate content,
this method normalize it for the web interface.''' this method normalize it for the web interface.'''
known: Optional[Union[str, List[Any]]] = None known: str | list[Any] | None = None
legitimate: Optional[Tuple[bool, Any]] = None legitimate: tuple[bool, Any] | None = None
if h not in known_content: if h not in known_content:
return known, legitimate return known, legitimate
@ -1340,13 +1342,13 @@ class Lookyloo():
known_content = self.context.find_known_content(hostnode) known_content = self.context.find_known_content(hostnode)
self.uwhois.query_whois_hostnode(hostnode) self.uwhois.query_whois_hostnode(hostnode)
urls: List[Dict[str, Any]] = [] urls: list[dict[str, Any]] = []
for url in hostnode.urls: for url in hostnode.urls:
# For the popup, we need: # For the popup, we need:
# * https vs http # * https vs http
# * everything after the domain # * everything after the domain
# * the full URL # * the full URL
to_append: Dict[str, Any] = { to_append: dict[str, Any] = {
'encrypted': url.name.startswith('https'), 'encrypted': url.name.startswith('https'),
'url_path': url.name.split('/', 3)[-1], 'url_path': url.name.split('/', 3)[-1],
'url_object': url, 'url_object': url,
@ -1389,7 +1391,7 @@ class Lookyloo():
# Optional: Cookies sent to server in request -> map to nodes who set the cookie in response # Optional: Cookies sent to server in request -> map to nodes who set the cookie in response
if hasattr(url, 'cookies_sent'): if hasattr(url, 'cookies_sent'):
to_display_sent: Dict[str, Set[Iterable[Optional[str]]]] = defaultdict(set) to_display_sent: dict[str, set[Iterable[str | None]]] = defaultdict(set)
for cookie, contexts in url.cookies_sent.items(): for cookie, contexts in url.cookies_sent.items():
if not contexts: if not contexts:
# Locally created? # Locally created?
@ -1401,7 +1403,7 @@ class Lookyloo():
# Optional: Cookies received from server in response -> map to nodes who send the cookie in request # Optional: Cookies received from server in response -> map to nodes who send the cookie in request
if hasattr(url, 'cookies_received'): if hasattr(url, 'cookies_received'):
to_display_received: Dict[str, Dict[str, Set[Iterable[Optional[str]]]]] = {'3rd_party': defaultdict(set), 'sent': defaultdict(set), 'not_sent': defaultdict(set)} to_display_received: dict[str, dict[str, set[Iterable[str | None]]]] = {'3rd_party': defaultdict(set), 'sent': defaultdict(set), 'not_sent': defaultdict(set)}
for domain, c_received, is_3rd_party in url.cookies_received: for domain, c_received, is_3rd_party in url.cookies_received:
if c_received not in ct.root_hartree.cookies_sent: if c_received not in ct.root_hartree.cookies_sent:
# This cookie is never sent. # This cookie is never sent.
@ -1421,14 +1423,14 @@ class Lookyloo():
urls.append(to_append) urls.append(to_append)
return hostnode, urls return hostnode, urls
def get_stats(self) -> Dict[str, List]: def get_stats(self) -> dict[str, list[Any]]:
'''Gather statistics about the lookyloo instance''' '''Gather statistics about the lookyloo instance'''
today = date.today() today = date.today()
calendar_week = today.isocalendar()[1] calendar_week = today.isocalendar()[1]
stats_dict = {'submissions': 0, 'redirects': 0} stats_dict = {'submissions': 0, 'redirects': 0}
stats: Dict[int, Dict[int, Dict[str, Any]]] = {} stats: dict[int, dict[int, dict[str, Any]]] = {}
weeks_stats: Dict[int, Dict] = {} weeks_stats: dict[int, dict[str, Any]] = {}
# Only recent captures that are not archived # Only recent captures that are not archived
for cache in self.sorted_capture_cache(): for cache in self.sorted_capture_cache():
@ -1467,7 +1469,7 @@ class Lookyloo():
stats[capture_ts.year][capture_ts.month] = {'submissions': 0} stats[capture_ts.year][capture_ts.month] = {'submissions': 0}
stats[capture_ts.year][capture_ts.month]['submissions'] += 1 stats[capture_ts.year][capture_ts.month]['submissions'] += 1
statistics: Dict[str, List] = {'weeks': [], 'years': []} statistics: dict[str, list[Any]] = {'weeks': [], 'years': []}
for week_number in sorted(weeks_stats.keys()): for week_number in sorted(weeks_stats.keys()):
week_stat = weeks_stats[week_number] week_stat = weeks_stats[week_number]
urls = week_stat.pop('uniq_urls') urls = week_stat.pop('uniq_urls')
@ -1477,7 +1479,7 @@ class Lookyloo():
statistics['weeks'].append(week_stat) statistics['weeks'].append(week_stat)
for year in sorted(stats.keys()): for year in sorted(stats.keys()):
year_stats: Dict[str, Union[int, List]] = {'year': year, 'months': [], 'yearly_submissions': 0} year_stats: dict[str, int | list[Any]] = {'year': year, 'months': [], 'yearly_submissions': 0}
for month in sorted(stats[year].keys()): for month in sorted(stats[year].keys()):
month_stats = stats[year][month] month_stats = stats[year][month]
if len(month_stats) == 1: if len(month_stats) == 1:
@ -1496,15 +1498,15 @@ class Lookyloo():
return statistics return statistics
def store_capture(self, uuid: str, is_public: bool, def store_capture(self, uuid: str, is_public: bool,
os: Optional[str]=None, browser: Optional[str]=None, os: str | None=None, browser: str | None=None,
parent: Optional[str]=None, parent: str | None=None,
downloaded_filename: Optional[str]=None, downloaded_file: Optional[bytes]=None, downloaded_filename: str | None=None, downloaded_file: bytes | None=None,
error: Optional[str]=None, har: Optional[Dict[str, Any]]=None, error: str | None=None, har: dict[str, Any] | None=None,
png: Optional[bytes]=None, html: Optional[str]=None, png: bytes | None=None, html: str | None=None,
last_redirected_url: Optional[str]=None, last_redirected_url: str | None=None,
cookies: Optional[Union[List['Cookie'], List[Dict[str, str]]]]=None, cookies: list[Cookie] | list[dict[str, str]] | None=None,
capture_settings: Optional[CaptureSettings]=None, capture_settings: CaptureSettings | None=None,
potential_favicons: Optional[Set[bytes]]=None potential_favicons: set[bytes] | None=None
) -> None: ) -> None:
now = datetime.now() now = datetime.now()
@ -1512,7 +1514,7 @@ class Lookyloo():
safe_create_dir(dirpath) safe_create_dir(dirpath)
if os or browser: if os or browser:
meta: Dict[str, str] = {} meta: dict[str, str] = {}
if os: if os:
meta['os'] = os meta['os'] = os
if browser: if browser:

View File

@ -14,3 +14,22 @@ from .riskiq import RiskIQ, RiskIQError # noqa
from .urlhaus import URLhaus # noqa from .urlhaus import URLhaus # noqa
from .cloudflare import Cloudflare # noqa from .cloudflare import Cloudflare # noqa
from .circlpdns import CIRCLPDNS # noqa from .circlpdns import CIRCLPDNS # noqa
__all__ = [
'FOX',
'MISPs',
'MISP',
'PhishingInitiative',
'SaneJavaScript',
'UrlScan',
'UniversalWhois',
'VirusTotal',
'Pandora',
'Phishtank',
'Hashlookup',
'RiskIQ',
'RiskIQError',
'URLhaus',
'Cloudflare',
'CIRCLPDNS'
]

View File

@ -1,12 +1,14 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import json import json
from datetime import date from datetime import date
from typing import Dict, List, Optional, TYPE_CHECKING from typing import Dict, List, Optional, TYPE_CHECKING
from urllib.parse import urlparse from urllib.parse import urlparse
from pypdns import PyPDNS, PDNSRecord from pypdns import PyPDNS, PDNSRecord # type: ignore[attr-defined]
from ..default import ConfigError, get_homedir from ..default import ConfigError, get_homedir
from ..helpers import get_cache_directory from ..helpers import get_cache_directory
@ -32,7 +34,7 @@ class CIRCLPDNS(AbstractModule):
self.storage_dir_pypdns.mkdir(parents=True, exist_ok=True) self.storage_dir_pypdns.mkdir(parents=True, exist_ok=True)
return True return True
def get_passivedns(self, query: str) -> Optional[List[PDNSRecord]]: def get_passivedns(self, query: str) -> list[PDNSRecord] | None:
# The query can be IP or Hostname. For now, we only do it on domains. # The query can be IP or Hostname. For now, we only do it on domains.
url_storage_dir = get_cache_directory(self.storage_dir_pypdns, query, 'pdns') url_storage_dir = get_cache_directory(self.storage_dir_pypdns, query, 'pdns')
if not url_storage_dir.exists(): if not url_storage_dir.exists():
@ -44,7 +46,7 @@ class CIRCLPDNS(AbstractModule):
with cached_entries[0].open() as f: with cached_entries[0].open() as f:
return [PDNSRecord(record) for record in json.load(f)] return [PDNSRecord(record) for record in json.load(f)]
def capture_default_trigger(self, cache: 'CaptureCache', /, *, force: bool=False, auto_trigger: bool=False) -> Dict: def capture_default_trigger(self, cache: CaptureCache, /, *, force: bool=False, auto_trigger: bool=False) -> dict[str, str]:
'''Run the module on all the nodes up to the final redirect''' '''Run the module on all the nodes up to the final redirect'''
if not self.available: if not self.available:
return {'error': 'Module not available'} return {'error': 'Module not available'}

View File

@ -1,6 +1,8 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from typing import Dict from __future__ import annotations
from typing import Dict, Any
import requests import requests
@ -29,7 +31,7 @@ class FOX(AbstractModule):
return True return True
def capture_default_trigger(self, url: str, /, auto_trigger: bool=False) -> Dict: def capture_default_trigger(self, url: str, /, auto_trigger: bool=False) -> dict[str, str]:
'''Run the module on the initial URL''' '''Run the module on the initial URL'''
if not self.available: if not self.available:
return {'error': 'Module not available'} return {'error': 'Module not available'}
@ -52,7 +54,7 @@ class FOX(AbstractModule):
response.raise_for_status() response.raise_for_status()
return True return True
def url_submit(self, url: str) -> Dict: def url_submit(self, url: str) -> dict[str, Any]:
'''Submit a URL to FOX '''Submit a URL to FOX
''' '''
if not self.available: if not self.available:

View File

@ -1,10 +1,12 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import json import json
from typing import Dict, List from typing import Dict, List
from har2tree import CrawledTree from har2tree import CrawledTree # type: ignore[attr-defined]
from pyhashlookup import Hashlookup from pyhashlookup import Hashlookup # type: ignore[attr-defined]
from ..default import ConfigError from ..default import ConfigError
from ..helpers import get_useragent_for_requests from ..helpers import get_useragent_for_requests
@ -31,7 +33,7 @@ class HashlookupModule(AbstractModule):
self.allow_auto_trigger = bool(self.config.get('allow_auto_trigger', False)) self.allow_auto_trigger = bool(self.config.get('allow_auto_trigger', False))
return True return True
def capture_default_trigger(self, crawled_tree: CrawledTree, /, *, auto_trigger: bool=False) -> Dict: def capture_default_trigger(self, crawled_tree: CrawledTree, /, *, auto_trigger: bool=False) -> dict[str, str]:
'''Run the module on all the nodes up to the final redirect''' '''Run the module on all the nodes up to the final redirect'''
if not self.available: if not self.available:
return {'error': 'Module not available'} return {'error': 'Module not available'}
@ -52,14 +54,14 @@ class HashlookupModule(AbstractModule):
return {'success': 'Module triggered'} return {'success': 'Module triggered'}
def hashes_lookup(self, hashes: List[str]) -> Dict[str, Dict[str, str]]: def hashes_lookup(self, hashes: list[str]) -> dict[str, dict[str, str]]:
'''Lookup a list of hashes against Hashlookup '''Lookup a list of hashes against Hashlookup
Note: It will trigger a request to hashlookup every time *until* there is a hit, then once a day. Note: It will trigger a request to hashlookup every time *until* there is a hit, then once a day.
''' '''
if not self.available: if not self.available:
raise ConfigError('Hashlookup not available, probably not enabled.') raise ConfigError('Hashlookup not available, probably not enabled.')
to_return: Dict[str, Dict[str, str]] = {} to_return: dict[str, dict[str, str]] = {}
for entry in self.client.sha1_bulk_lookup(hashes): for entry in self.client.sha1_bulk_lookup(hashes):
if 'SHA-1' in entry: if 'SHA-1' in entry:
to_return[entry['SHA-1'].lower()] = entry to_return[entry['SHA-1'].lower()] = entry

View File

@ -5,12 +5,12 @@ import re
from io import BytesIO from io import BytesIO
from collections import defaultdict from collections import defaultdict
from collections.abc import Mapping from collections.abc import Mapping
from typing import Any, Dict, List, Optional, Set, Union, TYPE_CHECKING from typing import Any, Dict, List, Optional, Set, Union, TYPE_CHECKING, Iterator
import requests import requests
from har2tree import HostNode, URLNode, Har2TreeError from har2tree import HostNode, URLNode, Har2TreeError # type: ignore[attr-defined]
from pymisp import MISPAttribute, MISPEvent, PyMISP from pymisp import MISPAttribute, MISPEvent, PyMISP, MISPTag # type: ignore[attr-defined]
from pymisp.tools import FileObject, URLObject from pymisp.tools import FileObject, URLObject # type: ignore[attr-defined]
from ..default import get_config, get_homedir from ..default import get_config, get_homedir
from ..helpers import get_public_suffix_list from ..helpers import get_public_suffix_list
@ -21,7 +21,7 @@ if TYPE_CHECKING:
from ..capturecache import CaptureCache from ..capturecache import CaptureCache
class MISPs(Mapping, AbstractModule): class MISPs(Mapping, AbstractModule): # type: ignore[type-arg]
def module_init(self) -> bool: def module_init(self) -> bool:
if not self.config.get('default'): if not self.config.get('default'):
@ -37,7 +37,7 @@ class MISPs(Mapping, AbstractModule):
self.logger.warning(f"The default MISP instance ({self.default_instance}) is missing in the instances ({', '.join(self.config['instances'].keys())}), disabling MISP.") self.logger.warning(f"The default MISP instance ({self.default_instance}) is missing in the instances ({', '.join(self.config['instances'].keys())}), disabling MISP.")
return False return False
self.__misps: Dict[str, 'MISP'] = {} self.__misps = {}
for instance_name, instance_config in self.config['instances'].items(): for instance_name, instance_config in self.config['instances'].items():
if misp_connector := MISP(config=instance_config): if misp_connector := MISP(config=instance_config):
if misp_connector.available: if misp_connector.available:
@ -56,10 +56,10 @@ class MISPs(Mapping, AbstractModule):
def __getitem__(self, name: str) -> 'MISP': def __getitem__(self, name: str) -> 'MISP':
return self.__misps[name] return self.__misps[name]
def __iter__(self): def __iter__(self) -> Iterator[dict[str, 'MISP']]:
return iter(self.__misps) return iter(self.__misps)
def __len__(self): def __len__(self) -> int:
return len(self.__misps) return len(self.__misps)
@property @property
@ -170,10 +170,10 @@ class MISP(AbstractModule):
self.psl = get_public_suffix_list() self.psl = get_public_suffix_list()
return True return True
def get_fav_tags(self): def get_fav_tags(self) -> dict[Any, Any] | list[MISPTag]:
return self.client.tags(pythonify=True, favouritesOnly=1) return self.client.tags(pythonify=True, favouritesOnly=1)
def _prepare_push(self, to_push: Union[List[MISPEvent], MISPEvent], allow_duplicates: bool=False, auto_publish: Optional[bool]=False) -> Union[List[MISPEvent], Dict]: def _prepare_push(self, to_push: Union[List[MISPEvent], MISPEvent], allow_duplicates: bool=False, auto_publish: Optional[bool]=False) -> Union[List[MISPEvent], Dict[str, str]]:
'''Adds the pre-configured information as required by the instance. '''Adds the pre-configured information as required by the instance.
If duplicates aren't allowed, they will be automatically skiped and the If duplicates aren't allowed, they will be automatically skiped and the
extends_uuid key in the next element in the list updated''' extends_uuid key in the next element in the list updated'''
@ -196,11 +196,11 @@ class MISP(AbstractModule):
for tag in self.default_tags: for tag in self.default_tags:
event.add_tag(tag) event.add_tag(tag)
if auto_publish: if auto_publish:
event.publish() event.publish() # type: ignore[no-untyped-call]
events_to_push.append(event) events_to_push.append(event)
return events_to_push return events_to_push
def push(self, to_push: Union[List[MISPEvent], MISPEvent], allow_duplicates: bool=False, auto_publish: Optional[bool]=None) -> Union[List[MISPEvent], Dict]: def push(self, to_push: Union[List[MISPEvent], MISPEvent], allow_duplicates: bool=False, auto_publish: Optional[bool]=None) -> Union[List[MISPEvent], Dict[Any, Any]]:
if auto_publish is None: if auto_publish is None:
auto_publish = self.auto_publish auto_publish = self.auto_publish
if self.available and self.enable_push: if self.available and self.enable_push:

View File

@ -1,9 +1,11 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from io import BytesIO from __future__ import annotations
from typing import Dict
from pypandora import PyPandora from io import BytesIO
from typing import Dict, Any
from pypandora import PyPandora # type: ignore[attr-defined]
from ..default import ConfigError from ..default import ConfigError
from ..helpers import get_useragent_for_requests from ..helpers import get_useragent_for_requests
@ -27,7 +29,7 @@ class Pandora(AbstractModule):
return True return True
def capture_default_trigger(self, file_in_memory: BytesIO, filename: str, /, auto_trigger: bool=False) -> Dict: def capture_default_trigger(self, file_in_memory: BytesIO, filename: str, /, auto_trigger: bool=False) -> dict[str, str]:
'''Automatically submit the file if the landing URL is a file instead of a webpage''' '''Automatically submit the file if the landing URL is a file instead of a webpage'''
if not self.available: if not self.available:
return {'error': 'Module not available'} return {'error': 'Module not available'}
@ -39,7 +41,7 @@ class Pandora(AbstractModule):
self.submit_file(file_in_memory, filename) self.submit_file(file_in_memory, filename)
return {'success': 'Module triggered'} return {'success': 'Module triggered'}
def submit_file(self, file_in_memory: BytesIO, filename: str) -> Dict: def submit_file(self, file_in_memory: BytesIO, filename: str) -> dict[str, Any]:
'''Submit a file to Pandora''' '''Submit a file to Pandora'''
if not self.available: if not self.available:
raise ConfigError('Pandora not available, probably not able to reach the server.') raise ConfigError('Pandora not available, probably not able to reach the server.')

View File

@ -1,11 +1,13 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import json import json
from datetime import date, datetime, timedelta, timezone from datetime import date, datetime, timedelta, timezone
from typing import Any, Dict, Optional, List, TYPE_CHECKING from typing import Any, Dict, Optional, List, TYPE_CHECKING
from pyphishtanklookup import PhishtankLookup from pyphishtanklookup import PhishtankLookup # type: ignore[attr-defined]
from ..default import ConfigError, get_homedir from ..default import ConfigError, get_homedir
from ..helpers import get_cache_directory from ..helpers import get_cache_directory
@ -38,7 +40,7 @@ class Phishtank(AbstractModule):
self.storage_dir_pt.mkdir(parents=True, exist_ok=True) self.storage_dir_pt.mkdir(parents=True, exist_ok=True)
return True return True
def get_url_lookup(self, url: str) -> Optional[Dict[str, Any]]: def get_url_lookup(self, url: str) -> dict[str, Any] | None:
url_storage_dir = get_cache_directory(self.storage_dir_pt, url, 'url') url_storage_dir = get_cache_directory(self.storage_dir_pt, url, 'url')
if not url_storage_dir.exists(): if not url_storage_dir.exists():
return None return None
@ -49,10 +51,10 @@ class Phishtank(AbstractModule):
with cached_entries[0].open() as f: with cached_entries[0].open() as f:
return json.load(f) return json.load(f)
def lookup_ips_capture(self, cache: 'CaptureCache') -> Dict[str, List[Dict[str, Any]]]: def lookup_ips_capture(self, cache: CaptureCache) -> dict[str, list[dict[str, Any]]]:
with (cache.capture_dir / 'ips.json').open() as f: with (cache.capture_dir / 'ips.json').open() as f:
ips_dump = json.load(f) ips_dump = json.load(f)
to_return: Dict[str, List[Dict[str, Any]]] = {} to_return: dict[str, list[dict[str, Any]]] = {}
for ip in {ip for ips_list in ips_dump.values() for ip in ips_list}: for ip in {ip for ips_list in ips_dump.values() for ip in ips_list}:
entry = self.get_ip_lookup(ip) entry = self.get_ip_lookup(ip)
if not entry: if not entry:
@ -64,7 +66,7 @@ class Phishtank(AbstractModule):
to_return[ip].append(entry) to_return[ip].append(entry)
return to_return return to_return
def get_ip_lookup(self, ip: str) -> Optional[Dict[str, Any]]: def get_ip_lookup(self, ip: str) -> dict[str, Any] | None:
ip_storage_dir = get_cache_directory(self.storage_dir_pt, ip, 'ip') ip_storage_dir = get_cache_directory(self.storage_dir_pt, ip, 'ip')
if not ip_storage_dir.exists(): if not ip_storage_dir.exists():
return None return None
@ -75,7 +77,7 @@ class Phishtank(AbstractModule):
with cached_entries[0].open() as f: with cached_entries[0].open() as f:
return json.load(f) return json.load(f)
def capture_default_trigger(self, cache: 'CaptureCache', /, *, auto_trigger: bool=False) -> Dict: def capture_default_trigger(self, cache: CaptureCache, /, *, auto_trigger: bool=False) -> dict[str, str]:
'''Run the module on all the nodes up to the final redirect''' '''Run the module on all the nodes up to the final redirect'''
if not self.available: if not self.available:
return {'error': 'Module not available'} return {'error': 'Module not available'}

View File

@ -1,12 +1,14 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import json import json
import time import time
from datetime import date from datetime import date
from typing import Any, Dict, Optional, TYPE_CHECKING from typing import Any, Dict, Optional, TYPE_CHECKING
from pyeupi import PyEUPI from pyeupi import PyEUPI # type: ignore[attr-defined]
from ..default import ConfigError, get_homedir from ..default import ConfigError, get_homedir
from ..helpers import get_cache_directory from ..helpers import get_cache_directory
@ -34,7 +36,7 @@ class PhishingInitiative(AbstractModule):
self.storage_dir_eupi.mkdir(parents=True, exist_ok=True) self.storage_dir_eupi.mkdir(parents=True, exist_ok=True)
return True return True
def get_url_lookup(self, url: str) -> Optional[Dict[str, Any]]: def get_url_lookup(self, url: str) -> dict[str, Any] | None:
url_storage_dir = get_cache_directory(self.storage_dir_eupi, url) url_storage_dir = get_cache_directory(self.storage_dir_eupi, url)
if not url_storage_dir.exists(): if not url_storage_dir.exists():
return None return None
@ -45,7 +47,7 @@ class PhishingInitiative(AbstractModule):
with cached_entries[0].open() as f: with cached_entries[0].open() as f:
return json.load(f) return json.load(f)
def capture_default_trigger(self, cache: 'CaptureCache', /, *, force: bool=False, auto_trigger: bool=False) -> Dict: def capture_default_trigger(self, cache: CaptureCache, /, *, force: bool=False, auto_trigger: bool=False) -> dict[str, str]:
'''Run the module on all the nodes up to the final redirect''' '''Run the module on all the nodes up to the final redirect'''
if not self.available: if not self.available:
return {'error': 'Module not available'} return {'error': 'Module not available'}

View File

@ -1,5 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import json import json
from datetime import date, datetime, timedelta from datetime import date, datetime, timedelta
@ -56,7 +58,7 @@ class RiskIQ(AbstractModule):
self.storage_dir_riskiq.mkdir(parents=True, exist_ok=True) self.storage_dir_riskiq.mkdir(parents=True, exist_ok=True)
return True return True
def get_passivedns(self, query: str) -> Optional[Dict[str, Any]]: def get_passivedns(self, query: str) -> dict[str, Any] | None:
# The query can be IP or Hostname. For now, we only do it on domains. # The query can be IP or Hostname. For now, we only do it on domains.
url_storage_dir = get_cache_directory(self.storage_dir_riskiq, query, 'pdns') url_storage_dir = get_cache_directory(self.storage_dir_riskiq, query, 'pdns')
if not url_storage_dir.exists(): if not url_storage_dir.exists():
@ -68,7 +70,7 @@ class RiskIQ(AbstractModule):
with cached_entries[0].open() as f: with cached_entries[0].open() as f:
return json.load(f) return json.load(f)
def capture_default_trigger(self, cache: 'CaptureCache', /, *, force: bool=False, auto_trigger: bool=False) -> Dict: def capture_default_trigger(self, cache: CaptureCache, /, *, force: bool=False, auto_trigger: bool=False) -> dict[str, str]:
'''Run the module on all the nodes up to the final redirect''' '''Run the module on all the nodes up to the final redirect'''
if not self.available: if not self.available:
return {'error': 'Module not available'} return {'error': 'Module not available'}
@ -88,7 +90,7 @@ class RiskIQ(AbstractModule):
self.pdns_lookup(hostname, force) self.pdns_lookup(hostname, force)
return {'success': 'Module triggered'} return {'success': 'Module triggered'}
def pdns_lookup(self, hostname: str, force: bool=False, first_seen: Optional[Union[date, datetime]]=None) -> None: def pdns_lookup(self, hostname: str, force: bool=False, first_seen: date | datetime | None=None) -> None:
'''Lookup an hostname on RiskIQ Passive DNS '''Lookup an hostname on RiskIQ Passive DNS
Note: force means re-fetch the entry RiskIQ even if we already did it today Note: force means re-fetch the entry RiskIQ even if we already did it today
''' '''

View File

@ -1,10 +1,12 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import json import json
from datetime import date from datetime import date
from typing import Dict, Iterable, List, Union from typing import Dict, Iterable, List, Union
from pysanejs import SaneJS from pysanejs import SaneJS # type: ignore[attr-defined]
from ..default import get_homedir from ..default import get_homedir
@ -29,7 +31,7 @@ class SaneJavaScript(AbstractModule):
self.storage_dir.mkdir(parents=True, exist_ok=True) self.storage_dir.mkdir(parents=True, exist_ok=True)
return True return True
def hashes_lookup(self, sha512: Union[Iterable[str], str], force: bool=False) -> Dict[str, List[str]]: def hashes_lookup(self, sha512: Iterable[str] | str, force: bool=False) -> dict[str, list[str]]:
if isinstance(sha512, str): if isinstance(sha512, str):
hashes: Iterable[str] = [sha512] hashes: Iterable[str] = [sha512]
else: else:
@ -43,7 +45,7 @@ class SaneJavaScript(AbstractModule):
with sanejs_unknowns.open() as f: with sanejs_unknowns.open() as f:
unknown_hashes = {line.strip() for line in f.readlines()} unknown_hashes = {line.strip() for line in f.readlines()}
to_return: Dict[str, List[str]] = {} to_return: dict[str, list[str]] = {}
if force: if force:
to_lookup = hashes to_lookup = hashes

View File

@ -1,5 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import json import json
from datetime import date from datetime import date
from typing import Any, Dict, Optional, TYPE_CHECKING from typing import Any, Dict, Optional, TYPE_CHECKING
@ -29,7 +31,7 @@ class URLhaus(AbstractModule):
self.storage_dir_uh.mkdir(parents=True, exist_ok=True) self.storage_dir_uh.mkdir(parents=True, exist_ok=True)
return True return True
def get_url_lookup(self, url: str) -> Optional[Dict[str, Any]]: def get_url_lookup(self, url: str) -> dict[str, Any] | None:
url_storage_dir = get_cache_directory(self.storage_dir_uh, url, 'url') url_storage_dir = get_cache_directory(self.storage_dir_uh, url, 'url')
if not url_storage_dir.exists(): if not url_storage_dir.exists():
return None return None
@ -40,13 +42,13 @@ class URLhaus(AbstractModule):
with cached_entries[0].open() as f: with cached_entries[0].open() as f:
return json.load(f) return json.load(f)
def __url_result(self, url: str) -> Dict: def __url_result(self, url: str) -> dict[str, Any]:
data = {'url': url} data = {'url': url}
response = requests.post(f'{self.url}/url/', data) response = requests.post(f'{self.url}/url/', data)
response.raise_for_status() response.raise_for_status()
return response.json() return response.json()
def capture_default_trigger(self, cache: 'CaptureCache', /, *, auto_trigger: bool=False) -> Dict: def capture_default_trigger(self, cache: CaptureCache, /, *, auto_trigger: bool=False) -> dict[str, str]:
'''Run the module on all the nodes up to the final redirect''' '''Run the module on all the nodes up to the final redirect'''
if not self.available: if not self.available:
return {'error': 'Module not available'} return {'error': 'Module not available'}

View File

@ -1,5 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import json import json
from datetime import date from datetime import date
from typing import Any, Dict, Optional, TYPE_CHECKING from typing import Any, Dict, Optional, TYPE_CHECKING
@ -47,7 +49,7 @@ class UrlScan(AbstractModule):
self.storage_dir_urlscan.mkdir(parents=True, exist_ok=True) self.storage_dir_urlscan.mkdir(parents=True, exist_ok=True)
return True return True
def get_url_submission(self, capture_info: 'CaptureCache') -> Dict[str, Any]: def get_url_submission(self, capture_info: CaptureCache) -> dict[str, Any]:
url_storage_dir = get_cache_directory( url_storage_dir = get_cache_directory(
self.storage_dir_urlscan, self.storage_dir_urlscan,
f'{capture_info.url}{capture_info.user_agent}{capture_info.referer}', f'{capture_info.url}{capture_info.user_agent}{capture_info.referer}',
@ -61,7 +63,7 @@ class UrlScan(AbstractModule):
with cached_entries[0].open() as f: with cached_entries[0].open() as f:
return json.load(f) return json.load(f)
def capture_default_trigger(self, capture_info: 'CaptureCache', /, visibility: str, *, force: bool=False, auto_trigger: bool=False) -> Dict: def capture_default_trigger(self, capture_info: CaptureCache, /, visibility: str, *, force: bool=False, auto_trigger: bool=False) -> dict[str, str]:
'''Run the module on the initial URL''' '''Run the module on the initial URL'''
if not self.available: if not self.available:
return {'error': 'Module not available'} return {'error': 'Module not available'}
@ -75,7 +77,7 @@ class UrlScan(AbstractModule):
self.url_submit(capture_info, visibility, force) self.url_submit(capture_info, visibility, force)
return {'success': 'Module triggered'} return {'success': 'Module triggered'}
def __submit_url(self, url: str, useragent: Optional[str], referer: Optional[str], visibility: str) -> Dict: def __submit_url(self, url: str, useragent: str | None, referer: str | None, visibility: str) -> dict[str, Any]:
data = {'customagent': useragent if useragent else '', 'referer': referer if referer else ''} data = {'customagent': useragent if useragent else '', 'referer': referer if referer else ''}
if not url.startswith('http'): if not url.startswith('http'):
@ -96,12 +98,12 @@ class UrlScan(AbstractModule):
response.raise_for_status() response.raise_for_status()
return response.json() return response.json()
def __url_result(self, uuid: str) -> Dict: def __url_result(self, uuid: str) -> dict[str, Any]:
response = self.client.get(f'https://urlscan.io/api/v1/result/{uuid}') response = self.client.get(f'https://urlscan.io/api/v1/result/{uuid}')
response.raise_for_status() response.raise_for_status()
return response.json() return response.json()
def url_submit(self, capture_info: 'CaptureCache', visibility: str, force: bool=False) -> Dict: def url_submit(self, capture_info: CaptureCache, visibility: str, force: bool=False) -> dict[str, Any]:
'''Lookup an URL on urlscan.io '''Lookup an URL on urlscan.io
Note: force means 2 things: Note: force means 2 things:
* (re)scan of the URL * (re)scan of the URL
@ -142,7 +144,7 @@ class UrlScan(AbstractModule):
return response return response
return {'error': 'Submitting is not allowed by the configuration'} return {'error': 'Submitting is not allowed by the configuration'}
def url_result(self, capture_info: 'CaptureCache'): def url_result(self, capture_info: CaptureCache) -> dict[str, Any]:
'''Get the result from a submission.''' '''Get the result from a submission.'''
submission = self.get_url_submission(capture_info) submission = self.get_url_submission(capture_info)
if submission and 'uuid' in submission: if submission and 'uuid' in submission:

View File

@ -1,11 +1,13 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import re import re
import socket import socket
from typing import overload, Literal, List, Union from typing import overload, Literal, List, Union
from har2tree import CrawledTree, Har2TreeError, HostNode from har2tree import CrawledTree, Har2TreeError, HostNode # type: ignore[attr-defined]
from .abstractmodule import AbstractModule from .abstractmodule import AbstractModule
@ -62,7 +64,7 @@ class UniversalWhois(AbstractModule):
self.query_whois_hostnode(n) self.query_whois_hostnode(n)
@overload @overload
def whois(self, query: str, contact_email_only: Literal[True]) -> List[str]: def whois(self, query: str, contact_email_only: Literal[True]) -> list[str]:
... ...
@overload @overload
@ -70,10 +72,10 @@ class UniversalWhois(AbstractModule):
... ...
@overload @overload
def whois(self, query: str, contact_email_only: bool=False) -> Union[str, List[str]]: def whois(self, query: str, contact_email_only: bool=False) -> str | list[str]:
... ...
def whois(self, query: str, contact_email_only: bool=False) -> Union[str, List[str]]: def whois(self, query: str, contact_email_only: bool=False) -> str | list[str]:
if not self.available: if not self.available:
return '' return ''
bytes_whois = b'' bytes_whois = b''

View File

@ -1,5 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import json import json
import time import time
from datetime import date from datetime import date
@ -18,9 +20,10 @@ if TYPE_CHECKING:
from .abstractmodule import AbstractModule from .abstractmodule import AbstractModule
def jsonify_vt(obj: WhistleBlowerDict): def jsonify_vt(obj: WhistleBlowerDict) -> dict[str, Any]:
if isinstance(obj, WhistleBlowerDict): if isinstance(obj, WhistleBlowerDict):
return {k: v for k, v in obj.items()} return {k: v for k, v in obj.items()}
return obj
class VirusTotal(AbstractModule): class VirusTotal(AbstractModule):
@ -39,7 +42,7 @@ class VirusTotal(AbstractModule):
self.storage_dir_vt.mkdir(parents=True, exist_ok=True) self.storage_dir_vt.mkdir(parents=True, exist_ok=True)
return True return True
def get_url_lookup(self, url: str) -> Optional[Dict[str, Any]]: def get_url_lookup(self, url: str) -> dict[str, Any] | None:
url_storage_dir = get_cache_directory(self.storage_dir_vt, vt.url_id(url)) url_storage_dir = get_cache_directory(self.storage_dir_vt, vt.url_id(url))
if not url_storage_dir.exists(): if not url_storage_dir.exists():
return None return None
@ -54,7 +57,7 @@ class VirusTotal(AbstractModule):
cached_entries[0].unlink(missing_ok=True) cached_entries[0].unlink(missing_ok=True)
return None return None
def capture_default_trigger(self, cache: 'CaptureCache', /, *, force: bool=False, auto_trigger: bool=False) -> Dict: def capture_default_trigger(self, cache: CaptureCache, /, *, force: bool=False, auto_trigger: bool=False) -> dict[str, str]:
'''Run the module on all the nodes up to the final redirect''' '''Run the module on all the nodes up to the final redirect'''
if not self.available: if not self.available:
return {'error': 'Module not available'} return {'error': 'Module not available'}

8
mypy.ini Normal file
View File

@ -0,0 +1,8 @@
[mypy]
strict = True
warn_return_any = False
show_error_context = True
pretty = True
[mypy-docs.source.*]
ignore_errors = True

18
poetry.lock generated
View File

@ -1447,18 +1447,18 @@ referencing = ">=0.31.0"
[[package]] [[package]]
name = "lacuscore" name = "lacuscore"
version = "1.7.8" version = "1.7.9"
description = "Core of Lacus, usable as a module" description = "Core of Lacus, usable as a module"
optional = false optional = false
python-versions = ">=3.8,<4.0" python-versions = ">=3.8,<4.0"
files = [ files = [
{file = "lacuscore-1.7.8-py3-none-any.whl", hash = "sha256:b877567a7efb35802c5fb6a01a8b88602978c16b49ee0ceead937337c6710081"}, {file = "lacuscore-1.7.9-py3-none-any.whl", hash = "sha256:74309aa4216fabffadd4ab724f8f2273d12e59dedd8e826e2710847d92497f8c"},
{file = "lacuscore-1.7.8.tar.gz", hash = "sha256:e0aa938a6555c8fe8485777e04c2ca549cd3b1fd7a75e7839d49a3fef1499252"}, {file = "lacuscore-1.7.9.tar.gz", hash = "sha256:cb0df82d88ffe805fc78c60e535ee54d82842b763a84ad97cfc2a5a99d4c3ed7"},
] ]
[package.dependencies] [package.dependencies]
defang = ">=0.5.3,<0.6.0" defang = ">=0.5.3,<0.6.0"
playwrightcapture = {version = ">=1.22.5,<2.0.0", extras = ["recaptcha"]} playwrightcapture = {version = ">=1.22.6,<2.0.0", extras = ["recaptcha"]}
redis = {version = ">=5.0.1,<6.0.0", extras = ["hiredis"]} redis = {version = ">=5.0.1,<6.0.0", extras = ["hiredis"]}
requests = ">=2.31.0,<3.0.0" requests = ">=2.31.0,<3.0.0"
ua-parser = ">=0.18.0,<0.19.0" ua-parser = ">=0.18.0,<0.19.0"
@ -2154,13 +2154,13 @@ test = ["pytest"]
[[package]] [[package]]
name = "playwrightcapture" name = "playwrightcapture"
version = "1.22.5" version = "1.22.6"
description = "A simple library to capture websites using playwright" description = "A simple library to capture websites using playwright"
optional = false optional = false
python-versions = ">=3.8,<4.0" python-versions = ">=3.8,<4.0"
files = [ files = [
{file = "playwrightcapture-1.22.5-py3-none-any.whl", hash = "sha256:023d394efe2c6173178ac7a9143a9b77400704b965280c494e9bb418eaa2ea86"}, {file = "playwrightcapture-1.22.6-py3-none-any.whl", hash = "sha256:910ad4dabbc51864f1c8fed6e62c2869a519211bcf7ae6e9c5aac3ea29268e33"},
{file = "playwrightcapture-1.22.5.tar.gz", hash = "sha256:8fac3bf723536ebc6ff0e1908aa838029a8b6e8ed1998fd162d5557d1d3fb2ec"}, {file = "playwrightcapture-1.22.6.tar.gz", hash = "sha256:b5c377585aba9ff71f055127b6be86458503ff3308e8fc8225dd4c05ab9597ae"},
] ]
[package.dependencies] [package.dependencies]
@ -2173,7 +2173,7 @@ pytz = {version = ">=2023.3.post1,<2024.0", markers = "python_version < \"3.9\""
requests = {version = ">=2.31.0,<3.0.0", extras = ["socks"], optional = true, markers = "extra == \"recaptcha\""} requests = {version = ">=2.31.0,<3.0.0", extras = ["socks"], optional = true, markers = "extra == \"recaptcha\""}
setuptools = ">=69.0.3,<70.0.0" setuptools = ">=69.0.3,<70.0.0"
SpeechRecognition = {version = ">=3.10.1,<4.0.0", optional = true, markers = "extra == \"recaptcha\""} SpeechRecognition = {version = ">=3.10.1,<4.0.0", optional = true, markers = "extra == \"recaptcha\""}
tzdata = ">=2023.3,<2024.0" tzdata = ">=2023.4,<2024.0"
w3lib = ">=2.1.2,<3.0.0" w3lib = ">=2.1.2,<3.0.0"
[package.extras] [package.extras]
@ -3592,4 +3592,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = ">=3.8.1,<3.12" python-versions = ">=3.8.1,<3.12"
content-hash = "9e6afc44fccf8789e1968b698fc9a6632bfb7fb5d053a404356000386d1fd3ad" content-hash = "95ea92c4f809ea280840866efc4385f75bbb4c7ace7cb9ac4979c17df722fd02"

View File

@ -65,7 +65,7 @@ passivetotal = "^2.5.9"
werkzeug = "^3.0.1" werkzeug = "^3.0.1"
filetype = "^1.2.0" filetype = "^1.2.0"
pypandora = "^1.6.1" pypandora = "^1.6.1"
lacuscore = "^1.7.8" lacuscore = "^1.7.9"
pylacus = "^1.7.1" pylacus = "^1.7.1"
pyipasnhistory = "^2.1.2" pyipasnhistory = "^2.1.2"
publicsuffixlist = "^0.10.0.20231214" publicsuffixlist = "^0.10.0.20231214"
@ -103,17 +103,3 @@ types-pytz = "^2023.3.1.1"
[build-system] [build-system]
requires = ["poetry_core"] requires = ["poetry_core"]
build-backend = "poetry.core.masonry.api" build-backend = "poetry.core.masonry.api"
[tool.mypy]
check_untyped_defs = true
ignore_errors = false
ignore_missing_imports = false
strict_optional = true
no_implicit_optional = true
warn_unused_ignores = true
warn_redundant_casts = true
warn_unused_configs = true
warn_unreachable = true
show_error_context = true
pretty = true

View File

@ -9,7 +9,7 @@ from lookyloo.default import safe_create_dir, get_socket_path
from lookyloo.helpers import get_captures_dir from lookyloo.helpers import get_captures_dir
def rename_captures(): def rename_captures() -> None:
r = Redis(unix_socket_path=get_socket_path('cache')) r = Redis(unix_socket_path=get_socket_path('cache'))
capture_dir: Path = get_captures_dir() capture_dir: Path = get_captures_dir()
for uuid_path in capture_dir.glob('*/uuid'): for uuid_path in capture_dir.glob('*/uuid'):

View File

@ -9,7 +9,7 @@ import s3fs # type: ignore
from lookyloo.default import get_config from lookyloo.default import get_config
def check_path(path: str): def check_path(path: str) -> dict[str, str]:
s3fs_config = get_config('generic', 's3fs') s3fs_config = get_config('generic', 's3fs')
s3fs_client = s3fs.S3FileSystem(key=s3fs_config['config']['key'], s3fs_client = s3fs.S3FileSystem(key=s3fs_config['config']['key'],
secret=s3fs_config['config']['secret'], secret=s3fs_config['config']['secret'],

View File

@ -4,14 +4,14 @@ import base64
import hashlib import hashlib
import json import json
from typing import Dict from typing import Dict, Any
from lookyloo.default import get_homedir from lookyloo.default import get_homedir
if __name__ == '__main__': if __name__ == '__main__':
dest_dir = get_homedir() / 'website' / 'web' dest_dir = get_homedir() / 'website' / 'web'
to_save: Dict = {'static': {}} to_save: Dict[str, Any] = {'static': {}}
for resource in (dest_dir / 'static').glob('*'): for resource in (dest_dir / 'static').glob('*'):
if resource.name[0] == '.': if resource.name[0] == '.':

View File

@ -73,7 +73,7 @@ def ua_parser(html_content: str) -> Dict[str, Any]:
return to_store return to_store
def main(): def main() -> None:
to_parse = Path('Most Common User Agents - Tech Blog (wh).html') to_parse = Path('Most Common User Agents - Tech Blog (wh).html')
today = datetime.now() today = datetime.now()

View File

@ -1,9 +1,11 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import os import os
import sys import sys
from typing import List, Tuple from typing import List, Tuple, Any
from redis import Redis from redis import Redis
from redis.exceptions import ConnectionError from redis.exceptions import ConnectionError
@ -21,11 +23,11 @@ console = Console(color_system="256")
class Monitoring(): class Monitoring():
def __init__(self) -> None: def __init__(self) -> None:
self.redis_cache: Redis = Redis(unix_socket_path=get_socket_path('cache'), decode_responses=True) self.redis_cache: Redis = Redis(unix_socket_path=get_socket_path('cache'), decode_responses=True) # type: ignore[type-arg]
self.redis_indexing: Redis = Redis(unix_socket_path=get_socket_path('indexing'), decode_responses=True) self.redis_indexing: Redis = Redis(unix_socket_path=get_socket_path('indexing'), decode_responses=True) # type: ignore[type-arg]
@property @property
def backend_status(self): def backend_status(self) -> bool:
socket_path_cache = get_socket_path('cache') socket_path_cache = get_socket_path('cache')
socket_path_index = get_socket_path('indexing') socket_path_index = get_socket_path('indexing')
backend_up = True backend_up = True
@ -56,12 +58,12 @@ class Monitoring():
return backend_up return backend_up
@property @property
def queues(self): def queues(self) -> list[tuple[str, float]]:
return self.redis_cache.zrevrangebyscore('queues', 'Inf', '-Inf', withscores=True) return self.redis_cache.zrevrangebyscore('queues', 'Inf', '-Inf', withscores=True)
@property @property
def ongoing_captures(self): def ongoing_captures(self) -> list[tuple[str, float, dict[str, Any]]]:
captures_uuid: List[Tuple[str, float]] = self.redis_cache.zrevrangebyscore('to_capture', 'Inf', '-Inf', withscores=True) captures_uuid: list[tuple[str, float]] = self.redis_cache.zrevrangebyscore('to_capture', 'Inf', '-Inf', withscores=True)
if not captures_uuid: if not captures_uuid:
return [] return []
to_return = [] to_return = []
@ -75,7 +77,7 @@ class Monitoring():
return to_return return to_return
@property @property
def tree_cache(self): def tree_cache(self) -> dict[str, str]:
to_return = {} to_return = {}
for pid_name, value in self.redis_cache.hgetall('tree_cache').items(): for pid_name, value in self.redis_cache.hgetall('tree_cache').items():
pid, name = pid_name.split('|', 1) pid, name = pid_name.split('|', 1)

View File

@ -4,14 +4,14 @@ import csv
import argparse import argparse
import logging import logging
from lookyloo.lookyloo import Indexing, Lookyloo from lookyloo import Indexing, Lookyloo
from lookyloo.helpers import get_captures_dir from lookyloo.helpers import get_captures_dir
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s', logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
level=logging.INFO) level=logging.INFO)
def main(): def main() -> None:
parser = argparse.ArgumentParser(description='Rebuild the redis cache.') parser = argparse.ArgumentParser(description='Rebuild the redis cache.')
parser.add_argument('--rebuild_pickles', default=False, action='store_true', help='Delete and rebuild the pickles. Count 20s/pickle, it can take a very long time.') parser.add_argument('--rebuild_pickles', default=False, action='store_true', help='Delete and rebuild the pickles. Count 20s/pickle, it can take a very long time.')
args = parser.parse_args() args = parser.parse_args()
@ -30,7 +30,7 @@ def main():
with index.open('r') as _f: with index.open('r') as _f:
recent_uuids = {uuid: str(index.parent / dirname) for uuid, dirname in csv.reader(_f) if (index.parent / dirname).exists()} recent_uuids = {uuid: str(index.parent / dirname) for uuid, dirname in csv.reader(_f) if (index.parent / dirname).exists()}
if recent_uuids: if recent_uuids:
lookyloo.redis.hset('lookup_dirs', mapping=recent_uuids) lookyloo.redis.hset('lookup_dirs', mapping=recent_uuids) # type: ignore[arg-type]
# This call will rebuild all the caches as needed. # This call will rebuild all the caches as needed.
lookyloo.sorted_capture_cache() lookyloo.sorted_capture_cache()

View File

@ -1,8 +1,8 @@
from lookyloo.lookyloo import Lookyloo from lookyloo import Lookyloo
import calendar import calendar
import datetime import datetime
from urllib.parse import urlparse from urllib.parse import urlparse
from typing import Dict, Any, Union, Set from typing import Dict, Any, Union, Set, List
lookyloo = Lookyloo() lookyloo = Lookyloo()
@ -15,11 +15,12 @@ weeks_stats: Dict[int, Dict[str, Union[int, Set[str]]]] = \
calendar_week: {'analysis': 0, 'analysis_with_redirects': 0, 'redirects': 0, 'uniq_urls': set()}} calendar_week: {'analysis': 0, 'analysis_with_redirects': 0, 'redirects': 0, 'uniq_urls': set()}}
def uniq_domains(uniq_urls): def uniq_domains(uniq_urls: List[str]) -> Set[str]:
domains = set() domains = set()
for url in uniq_urls: for url in uniq_urls:
splitted = urlparse(url) splitted = urlparse(url)
domains.add(splitted.hostname) if splitted.hostname:
domains.add(splitted.hostname)
return domains return domains
@ -50,8 +51,8 @@ for week_number, week_stat in weeks_stats.items():
print(' Number of analysis with redirects:', week_stat['analysis_with_redirects']) print(' Number of analysis with redirects:', week_stat['analysis_with_redirects'])
print(' Number of redirects:', week_stat['redirects']) print(' Number of redirects:', week_stat['redirects'])
print(' Number of unique URLs:', len(week_stat['uniq_urls'])) # type: ignore print(' Number of unique URLs:', len(week_stat['uniq_urls'])) # type: ignore
domains = uniq_domains(week_stat['uniq_urls']) d = uniq_domains(week_stat['uniq_urls']) # type: ignore[arg-type]
print(' Number of unique domains:', len(domains)) print(' Number of unique domains:', len(d))
for year, data in stats.items(): for year, data in stats.items():

View File

@ -7,7 +7,7 @@ import argparse
from lookyloo.default import get_homedir from lookyloo.default import get_homedir
def validate_generic_config_file(): def validate_generic_config_file() -> bool:
sample_config = get_homedir() / 'config' / 'generic.json.sample' sample_config = get_homedir() / 'config' / 'generic.json.sample'
with sample_config.open() as f: with sample_config.open() as f:
generic_config_sample = json.load(f) generic_config_sample = json.load(f)
@ -53,7 +53,7 @@ def validate_generic_config_file():
return True return True
def validate_modules_config_file(): def validate_modules_config_file() -> bool:
with (get_homedir() / 'config' / 'modules.json').open() as f: with (get_homedir() / 'config' / 'modules.json').open() as f:
modules_config = json.load(f) modules_config = json.load(f)
with (get_homedir() / 'config' / 'modules.json.sample').open() as f: with (get_homedir() / 'config' / 'modules.json.sample').open() as f:
@ -69,7 +69,7 @@ def validate_modules_config_file():
return True return True
def update_user_configs(): def update_user_configs() -> bool:
for file_name in ['generic', 'modules']: for file_name in ['generic', 'modules']:
with (get_homedir() / 'config' / f'{file_name}.json').open() as f: with (get_homedir() / 'config' / f'{file_name}.json').open() as f:
try: try:

View File

@ -1,5 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import base64 import base64
import calendar import calendar
import functools import functools
@ -22,14 +24,15 @@ from uuid import uuid4
from zipfile import ZipFile from zipfile import ZipFile
import flask_login # type: ignore import flask_login # type: ignore
from flask import (Flask, Response, flash, jsonify, redirect, render_template, from flask import (Flask, Response, Request, flash, jsonify, redirect, render_template,
request, send_file, url_for) request, send_file, url_for)
from flask_bootstrap import Bootstrap5 # type: ignore from flask_bootstrap import Bootstrap5 # type: ignore
from flask_cors import CORS # type: ignore from flask_cors import CORS # type: ignore
from flask_restx import Api # type: ignore from flask_restx import Api # type: ignore
from lacuscore import CaptureStatus from lacuscore import CaptureStatus
from pymisp import MISPEvent, MISPServerError from pymisp import MISPEvent, MISPServerError # type: ignore[attr-defined]
from werkzeug.security import check_password_hash from werkzeug.security import check_password_hash
from werkzeug.wrappers.response import Response as WerkzeugResponse
from lookyloo.default import get_config from lookyloo.default import get_config
from lookyloo.exceptions import MissingUUID, NoValidHarFile from lookyloo.exceptions import MissingUUID, NoValidHarFile
@ -71,8 +74,8 @@ login_manager.init_app(app)
user_agents = UserAgents() user_agents = UserAgents()
@login_manager.user_loader @login_manager.user_loader # type: ignore[misc]
def user_loader(username): def user_loader(username: str) -> User | None:
if username not in build_users_table(): if username not in build_users_table():
return None return None
user = User() user = User()
@ -80,13 +83,13 @@ def user_loader(username):
return user return user
@login_manager.request_loader @login_manager.request_loader # type: ignore[misc]
def _load_user_from_request(request): def _load_user_from_request(request: Request) -> User | None:
return load_user_from_request(request) return load_user_from_request(request)
@app.route('/login', methods=['GET', 'POST']) @app.route('/login', methods=['GET', 'POST'])
def login(): def login() -> WerkzeugResponse | str | Response:
if request.method == 'GET': if request.method == 'GET':
return ''' return '''
<form action='login' method='POST'> <form action='login' method='POST'>
@ -110,8 +113,8 @@ def login():
@app.route('/logout') @app.route('/logout')
@flask_login.login_required @flask_login.login_required # type: ignore[misc]
def logout(): def logout() -> WerkzeugResponse:
flask_login.logout_user() flask_login.logout_user()
flash('Successfully logged out.', 'success') flash('Successfully logged out.', 'success')
return redirect(url_for('index')) return redirect(url_for('index'))
@ -141,7 +144,7 @@ hide_captures_with_error = get_config('generic', 'hide_captures_with_error')
# Method to make sizes in bytes human readable # Method to make sizes in bytes human readable
# Source: https://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size # Source: https://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
def sizeof_fmt(num, suffix='B'): def sizeof_fmt(num: float, suffix: str='B') -> str:
for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']: for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
if abs(num) < 1024.0: if abs(num) < 1024.0:
return f"{num:3.1f}{unit}{suffix}" return f"{num:3.1f}{unit}{suffix}"
@ -152,7 +155,7 @@ def sizeof_fmt(num, suffix='B'):
app.jinja_env.globals.update(sizeof_fmt=sizeof_fmt) app.jinja_env.globals.update(sizeof_fmt=sizeof_fmt)
def http_status_description(code: int): def http_status_description(code: int) -> str:
if code in http.client.responses: if code in http.client.responses:
return http.client.responses[code] return http.client.responses[code]
return f'Invalid code: {code}' return f'Invalid code: {code}'
@ -161,7 +164,7 @@ def http_status_description(code: int):
app.jinja_env.globals.update(http_status_description=http_status_description) app.jinja_env.globals.update(http_status_description=http_status_description)
def month_name(month: int): def month_name(month: int) -> str:
return calendar.month_name[month] return calendar.month_name[month]
@ -181,8 +184,8 @@ class Icon(TypedDict):
tooltip: str tooltip: str
def get_icon(icon_id: str) -> Optional[Icon]: def get_icon(icon_id: str) -> Icon | None:
available_icons: Dict[str, Icon] = { available_icons: dict[str, Icon] = {
'js': {'icon': "javascript.png", 'tooltip': 'The content of the response is a javascript'}, 'js': {'icon': "javascript.png", 'tooltip': 'The content of the response is a javascript'},
'exe': {'icon': "exe.png", 'tooltip': 'The content of the response is an executable'}, 'exe': {'icon': "exe.png", 'tooltip': 'The content of the response is an executable'},
'css': {'icon': "css.png", 'tooltip': 'The content of the response is a CSS'}, 'css': {'icon': "css.png", 'tooltip': 'The content of the response is a CSS'},
@ -208,7 +211,7 @@ def get_icon(icon_id: str) -> Optional[Icon]:
app.jinja_env.globals.update(get_icon=get_icon) app.jinja_env.globals.update(get_icon=get_icon)
def get_tz_info() -> Tuple[Optional[str], str, Set[str]]: def get_tz_info() -> tuple[str | None, str, set[str]]:
now = datetime.now().astimezone() now = datetime.now().astimezone()
local_TZ = now.tzname() local_TZ = now.tzname()
local_UTC_offset = f'UTC{now.strftime("%z")}' local_UTC_offset = f'UTC{now.strftime("%z")}'
@ -221,7 +224,7 @@ app.jinja_env.globals.update(tz_info=get_tz_info)
# ##### Generic/configuration methods ##### # ##### Generic/configuration methods #####
@app.after_request @app.after_request
def after_request(response): def after_request(response: Response) -> Response:
if use_own_ua: if use_own_ua:
# We keep a list user agents in order to build a list to use in the capture # We keep a list user agents in order to build a list to use in the capture
# interface: this is the easiest way to have something up to date. # interface: this is the easiest way to have something up to date.
@ -241,9 +244,9 @@ def after_request(response):
return response return response
def file_response(func): def file_response(func): # type: ignore[no-untyped-def]
@functools.wraps(func) @functools.wraps(func)
def wrapper(*args, **kwargs): def wrapper(*args, **kwargs) -> Response: # type: ignore[no-untyped-def]
try: try:
return func(*args, **kwargs) return func(*args, **kwargs)
except NoValidHarFile: except NoValidHarFile:
@ -259,23 +262,23 @@ def file_response(func):
# ##### Hostnode level methods ##### # ##### Hostnode level methods #####
@app.route('/tree/<string:tree_uuid>/host/<string:node_uuid>/hashes', methods=['GET']) @app.route('/tree/<string:tree_uuid>/host/<string:node_uuid>/hashes', methods=['GET'])
@file_response @file_response # type: ignore[misc]
def hashes_hostnode(tree_uuid: str, node_uuid: str): def hashes_hostnode(tree_uuid: str, node_uuid: str) -> Response:
hashes = lookyloo.get_hashes(tree_uuid, hostnode_uuid=node_uuid) hashes = lookyloo.get_hashes(tree_uuid, hostnode_uuid=node_uuid)
return send_file(BytesIO('\n'.join(hashes).encode()), return send_file(BytesIO('\n'.join(hashes).encode()),
mimetype='test/plain', as_attachment=True, download_name=f'hashes.{node_uuid}.txt') mimetype='test/plain', as_attachment=True, download_name=f'hashes.{node_uuid}.txt')
@app.route('/tree/<string:tree_uuid>/host/<string:node_uuid>/text', methods=['GET']) @app.route('/tree/<string:tree_uuid>/host/<string:node_uuid>/text', methods=['GET'])
@file_response @file_response # type: ignore[misc]
def urls_hostnode(tree_uuid: str, node_uuid: str): def urls_hostnode(tree_uuid: str, node_uuid: str) -> Response:
hostnode = lookyloo.get_hostnode_from_tree(tree_uuid, node_uuid) hostnode = lookyloo.get_hostnode_from_tree(tree_uuid, node_uuid)
return send_file(BytesIO('\n'.join(url.name for url in hostnode.urls).encode()), return send_file(BytesIO('\n'.join(url.name for url in hostnode.urls).encode()),
mimetype='test/plain', as_attachment=True, download_name=f'urls.{node_uuid}.txt') mimetype='test/plain', as_attachment=True, download_name=f'urls.{node_uuid}.txt')
@app.route('/tree/<string:tree_uuid>/host/<string:node_uuid>', methods=['GET']) @app.route('/tree/<string:tree_uuid>/host/<string:node_uuid>', methods=['GET'])
def hostnode_popup(tree_uuid: str, node_uuid: str): def hostnode_popup(tree_uuid: str, node_uuid: str) -> str | WerkzeugResponse | Response:
try: try:
hostnode, urls = lookyloo.get_hostnode_investigator(tree_uuid, node_uuid) hostnode, urls = lookyloo.get_hostnode_investigator(tree_uuid, node_uuid)
except IndexError: except IndexError:
@ -294,7 +297,7 @@ def hostnode_popup(tree_uuid: str, node_uuid: str):
# ##### Tree level Methods ##### # ##### Tree level Methods #####
@app.route('/tree/<string:tree_uuid>/trigger_modules', methods=['GET']) @app.route('/tree/<string:tree_uuid>/trigger_modules', methods=['GET'])
def trigger_modules(tree_uuid: str): def trigger_modules(tree_uuid: str) -> WerkzeugResponse | str | Response:
force = True if (request.args.get('force') and request.args.get('force') == 'True') else False force = True if (request.args.get('force') and request.args.get('force') == 'True') else False
auto_trigger = True if (request.args.get('auto_trigger') and request.args.get('auto_trigger') == 'True') else False auto_trigger = True if (request.args.get('auto_trigger') and request.args.get('auto_trigger') == 'True') else False
lookyloo.trigger_modules(tree_uuid, force=force, auto_trigger=auto_trigger) lookyloo.trigger_modules(tree_uuid, force=force, auto_trigger=auto_trigger)
@ -302,7 +305,7 @@ def trigger_modules(tree_uuid: str):
@app.route('/tree/<string:tree_uuid>/historical_lookups', methods=['GET']) @app.route('/tree/<string:tree_uuid>/historical_lookups', methods=['GET'])
def historical_lookups(tree_uuid: str): def historical_lookups(tree_uuid: str) -> str | WerkzeugResponse | Response:
force = True if (request.args.get('force') and request.args.get('force') == 'True') else False force = True if (request.args.get('force') and request.args.get('force') == 'True') else False
data = lookyloo.get_historical_lookups(tree_uuid, force) data = lookyloo.get_historical_lookups(tree_uuid, force)
return render_template('historical_lookups.html', tree_uuid=tree_uuid, return render_template('historical_lookups.html', tree_uuid=tree_uuid,
@ -312,7 +315,7 @@ def historical_lookups(tree_uuid: str):
@app.route('/tree/<string:tree_uuid>/categories_capture/', defaults={'query': ''}) @app.route('/tree/<string:tree_uuid>/categories_capture/', defaults={'query': ''})
@app.route('/tree/<string:tree_uuid>/categories_capture/<string:query>', methods=['GET']) @app.route('/tree/<string:tree_uuid>/categories_capture/<string:query>', methods=['GET'])
def categories_capture(tree_uuid: str, query: str): def categories_capture(tree_uuid: str, query: str) -> str | WerkzeugResponse | Response:
if not enable_categorization: if not enable_categorization:
return redirect(url_for('tree', tree_uuid=tree_uuid)) return redirect(url_for('tree', tree_uuid=tree_uuid))
current_categories = lookyloo.categories_capture(tree_uuid) current_categories = lookyloo.categories_capture(tree_uuid)
@ -330,7 +333,7 @@ def categories_capture(tree_uuid: str, query: str):
@app.route('/tree/<string:tree_uuid>/uncategorize/', defaults={'category': ''}) @app.route('/tree/<string:tree_uuid>/uncategorize/', defaults={'category': ''})
@app.route('/tree/<string:tree_uuid>/uncategorize/<string:category>', methods=['GET']) @app.route('/tree/<string:tree_uuid>/uncategorize/<string:category>', methods=['GET'])
def uncategorize_capture(tree_uuid: str, category: str): def uncategorize_capture(tree_uuid: str, category: str) -> str | WerkzeugResponse | Response:
if not enable_categorization: if not enable_categorization:
return jsonify({'response': 'Categorization not enabled.'}) return jsonify({'response': 'Categorization not enabled.'})
lookyloo.uncategorize_capture(tree_uuid, category) lookyloo.uncategorize_capture(tree_uuid, category)
@ -339,7 +342,7 @@ def uncategorize_capture(tree_uuid: str, category: str):
@app.route('/tree/<string:tree_uuid>/categorize/', defaults={'category': ''}) @app.route('/tree/<string:tree_uuid>/categorize/', defaults={'category': ''})
@app.route('/tree/<string:tree_uuid>/categorize/<string:category>', methods=['GET']) @app.route('/tree/<string:tree_uuid>/categorize/<string:category>', methods=['GET'])
def categorize_capture(tree_uuid: str, category: str): def categorize_capture(tree_uuid: str, category: str) -> str | WerkzeugResponse | Response:
if not enable_categorization: if not enable_categorization:
return jsonify({'response': 'Categorization not enabled.'}) return jsonify({'response': 'Categorization not enabled.'})
lookyloo.categorize_capture(tree_uuid, category) lookyloo.categorize_capture(tree_uuid, category)
@ -347,19 +350,19 @@ def categorize_capture(tree_uuid: str, category: str):
@app.route('/tree/<string:tree_uuid>/stats', methods=['GET']) @app.route('/tree/<string:tree_uuid>/stats', methods=['GET'])
def stats(tree_uuid: str): def stats(tree_uuid: str) -> str:
stats = lookyloo.get_statistics(tree_uuid) stats = lookyloo.get_statistics(tree_uuid)
return render_template('statistics.html', uuid=tree_uuid, stats=stats) return render_template('statistics.html', uuid=tree_uuid, stats=stats)
@app.route('/tree/<string:tree_uuid>/misp_lookup', methods=['GET']) @app.route('/tree/<string:tree_uuid>/misp_lookup', methods=['GET'])
@flask_login.login_required @flask_login.login_required # type: ignore[misc]
def web_misp_lookup_view(tree_uuid: str): def web_misp_lookup_view(tree_uuid: str) -> str | WerkzeugResponse | Response:
if not lookyloo.misps.available: if not lookyloo.misps.available:
flash('There are no MISP instances available.', 'error') flash('There are no MISP instances available.', 'error')
return redirect(url_for('tree', tree_uuid=tree_uuid)) return redirect(url_for('tree', tree_uuid=tree_uuid))
misps_occurrences = {} misps_occurrences = {}
for instance_name in lookyloo.misps: for instance_name in lookyloo.misps.keys():
if occurrences := lookyloo.get_misp_occurrences(tree_uuid, instance_name=instance_name): if occurrences := lookyloo.get_misp_occurrences(tree_uuid, instance_name=instance_name):
misps_occurrences[instance_name] = occurrences misps_occurrences[instance_name] = occurrences
return render_template('misp_lookup.html', uuid=tree_uuid, return render_template('misp_lookup.html', uuid=tree_uuid,
@ -368,8 +371,8 @@ def web_misp_lookup_view(tree_uuid: str):
@app.route('/tree/<string:tree_uuid>/misp_push', methods=['GET', 'POST']) @app.route('/tree/<string:tree_uuid>/misp_push', methods=['GET', 'POST'])
@flask_login.login_required @flask_login.login_required # type: ignore[misc]
def web_misp_push_view(tree_uuid: str): def web_misp_push_view(tree_uuid: str) -> str | WerkzeugResponse | Response | None:
if not lookyloo.misps.available: if not lookyloo.misps.available:
flash('There are no MISP instances available.', 'error') flash('There are no MISP instances available.', 'error')
return redirect(url_for('tree', tree_uuid=tree_uuid)) return redirect(url_for('tree', tree_uuid=tree_uuid))
@ -413,7 +416,7 @@ def web_misp_push_view(tree_uuid: str):
# Submit the event # Submit the event
tags = request.form.getlist('tags') tags = request.form.getlist('tags')
error = False error = False
events: List[MISPEvent] = [] events: list[MISPEvent] = []
with_parents = request.form.get('with_parents') with_parents = request.form.get('with_parents')
if with_parents: if with_parents:
exports = lookyloo.misp_export(tree_uuid, True) exports = lookyloo.misp_export(tree_uuid, True)
@ -447,15 +450,16 @@ def web_misp_push_view(tree_uuid: str):
for e in new_events: for e in new_events:
flash(f'MISP event {e.id} created on {misp.client.root_url}', 'success') flash(f'MISP event {e.id} created on {misp.client.root_url}', 'success')
return redirect(url_for('tree', tree_uuid=tree_uuid)) return redirect(url_for('tree', tree_uuid=tree_uuid))
return None
@app.route('/tree/<string:tree_uuid>/modules', methods=['GET']) @app.route('/tree/<string:tree_uuid>/modules', methods=['GET'])
def modules(tree_uuid: str): def modules(tree_uuid: str) -> str | WerkzeugResponse | Response:
modules_responses = lookyloo.get_modules_responses(tree_uuid) modules_responses = lookyloo.get_modules_responses(tree_uuid)
if not modules_responses: if not modules_responses:
return redirect(url_for('tree', tree_uuid=tree_uuid)) return redirect(url_for('tree', tree_uuid=tree_uuid))
vt_short_result: Dict[str, Dict[str, Any]] = {} vt_short_result: dict[str, dict[str, Any]] = {}
if 'vt' in modules_responses: if 'vt' in modules_responses:
# VirusTotal cleanup # VirusTotal cleanup
vt = modules_responses.pop('vt') vt = modules_responses.pop('vt')
@ -471,7 +475,7 @@ def modules(tree_uuid: str):
if result['category'] == 'malicious': if result['category'] == 'malicious':
vt_short_result[url]['malicious'].append((vendor, result['result'])) vt_short_result[url]['malicious'].append((vendor, result['result']))
pi_short_result: Dict[str, str] = {} pi_short_result: dict[str, str] = {}
if 'pi' in modules_responses: if 'pi' in modules_responses:
pi = modules_responses.pop('pi') pi = modules_responses.pop('pi')
for url, full_report in pi.items(): for url, full_report in pi.items():
@ -479,7 +483,7 @@ def modules(tree_uuid: str):
continue continue
pi_short_result[url] = full_report['results'][0]['tag_label'] pi_short_result[url] = full_report['results'][0]['tag_label']
phishtank_short_result: Dict[str, Dict] = {'urls': {}, 'ips_hits': {}} phishtank_short_result: dict[str, dict[str, Any]] = {'urls': {}, 'ips_hits': {}}
if 'phishtank' in modules_responses: if 'phishtank' in modules_responses:
pt = modules_responses.pop('phishtank') pt = modules_responses.pop('phishtank')
for url, full_report in pt['urls'].items(): for url, full_report in pt['urls'].items():
@ -496,7 +500,7 @@ def modules(tree_uuid: str):
full_report['url'], full_report['url'],
full_report['phish_detail_url'])) full_report['phish_detail_url']))
urlhaus_short_result: Dict[str, List] = {'urls': []} urlhaus_short_result: dict[str, list[Any]] = {'urls': []}
if 'urlhaus' in modules_responses: if 'urlhaus' in modules_responses:
# TODO: make a short result # TODO: make a short result
uh = modules_responses.pop('urlhaus') uh = modules_responses.pop('urlhaus')
@ -504,7 +508,7 @@ def modules(tree_uuid: str):
if results: if results:
urlhaus_short_result['urls'].append(results) urlhaus_short_result['urls'].append(results)
urlscan_to_display: Dict = {} urlscan_to_display: dict[str, Any] = {}
if 'urlscan' in modules_responses and modules_responses.get('urlscan'): if 'urlscan' in modules_responses and modules_responses.get('urlscan'):
urlscan = modules_responses.pop('urlscan') urlscan = modules_responses.pop('urlscan')
if 'error' in urlscan['submission']: if 'error' in urlscan['submission']:
@ -534,8 +538,8 @@ def modules(tree_uuid: str):
@app.route('/tree/<string:tree_uuid>/redirects', methods=['GET']) @app.route('/tree/<string:tree_uuid>/redirects', methods=['GET'])
@file_response @file_response # type: ignore[misc]
def redirects(tree_uuid: str): def redirects(tree_uuid: str) -> Response:
cache = lookyloo.capture_cache(tree_uuid) cache = lookyloo.capture_cache(tree_uuid)
if not cache or not hasattr(cache, 'redirects'): if not cache or not hasattr(cache, 'redirects'):
return Response('Not available.', mimetype='text/text') return Response('Not available.', mimetype='text/text')
@ -550,8 +554,8 @@ def redirects(tree_uuid: str):
@app.route('/tree/<string:tree_uuid>/image', methods=['GET']) @app.route('/tree/<string:tree_uuid>/image', methods=['GET'])
@file_response @file_response # type: ignore[misc]
def image(tree_uuid: str): def image(tree_uuid: str) -> Response:
max_width = request.args.get('width') max_width = request.args.get('width')
if max_width and max_width.isdigit(): if max_width and max_width.isdigit():
to_return = lookyloo.get_screenshot_thumbnail(tree_uuid, width=int(max_width)) to_return = lookyloo.get_screenshot_thumbnail(tree_uuid, width=int(max_width))
@ -562,12 +566,11 @@ def image(tree_uuid: str):
@app.route('/tree/<string:tree_uuid>/data', methods=['GET']) @app.route('/tree/<string:tree_uuid>/data', methods=['GET'])
@file_response @file_response # type: ignore[misc]
def data(tree_uuid: str): def data(tree_uuid: str) -> Response:
filename, data = lookyloo.get_data(tree_uuid) filename, data = lookyloo.get_data(tree_uuid)
if len(filename) == 0: if len(filename) == 0:
# TODO: return something saying it is not a valid request return Response('No files.', mimetype='text/text')
return
if filetype.guess_mime(data.getvalue()) is None: if filetype.guess_mime(data.getvalue()) is None:
mime = 'application/octet-stream' mime = 'application/octet-stream'
@ -579,46 +582,46 @@ def data(tree_uuid: str):
@app.route('/tree/<string:tree_uuid>/thumbnail/', defaults={'width': 64}, methods=['GET']) @app.route('/tree/<string:tree_uuid>/thumbnail/', defaults={'width': 64}, methods=['GET'])
@app.route('/tree/<string:tree_uuid>/thumbnail/<int:width>', methods=['GET']) @app.route('/tree/<string:tree_uuid>/thumbnail/<int:width>', methods=['GET'])
@file_response @file_response # type: ignore[misc]
def thumbnail(tree_uuid: str, width: int): def thumbnail(tree_uuid: str, width: int) -> Response:
to_return = lookyloo.get_screenshot_thumbnail(tree_uuid, for_datauri=False, width=width) to_return = lookyloo.get_screenshot_thumbnail(tree_uuid, for_datauri=False, width=width)
return send_file(to_return, mimetype='image/png') return send_file(to_return, mimetype='image/png')
@app.route('/tree/<string:tree_uuid>/html', methods=['GET']) @app.route('/tree/<string:tree_uuid>/html', methods=['GET'])
@file_response @file_response # type: ignore[misc]
def html(tree_uuid: str): def html(tree_uuid: str) -> Response:
to_return = lookyloo.get_html(tree_uuid) to_return = lookyloo.get_html(tree_uuid)
return send_file(to_return, mimetype='text/html', return send_file(to_return, mimetype='text/html',
as_attachment=True, download_name='page.html') as_attachment=True, download_name='page.html')
@app.route('/tree/<string:tree_uuid>/cookies', methods=['GET']) @app.route('/tree/<string:tree_uuid>/cookies', methods=['GET'])
@file_response @file_response # type: ignore[misc]
def cookies(tree_uuid: str): def cookies(tree_uuid: str) -> Response:
to_return = lookyloo.get_cookies(tree_uuid) to_return = lookyloo.get_cookies(tree_uuid)
return send_file(to_return, mimetype='application/json', return send_file(to_return, mimetype='application/json',
as_attachment=True, download_name='cookies.json') as_attachment=True, download_name='cookies.json')
@app.route('/tree/<string:tree_uuid>/hashes', methods=['GET']) @app.route('/tree/<string:tree_uuid>/hashes', methods=['GET'])
@file_response @file_response # type: ignore[misc]
def hashes_tree(tree_uuid: str): def hashes_tree(tree_uuid: str) -> Response:
hashes = lookyloo.get_hashes(tree_uuid) hashes = lookyloo.get_hashes(tree_uuid)
return send_file(BytesIO('\n'.join(hashes).encode()), return send_file(BytesIO('\n'.join(hashes).encode()),
mimetype='test/plain', as_attachment=True, download_name='hashes.txt') mimetype='test/plain', as_attachment=True, download_name='hashes.txt')
@app.route('/tree/<string:tree_uuid>/export', methods=['GET']) @app.route('/tree/<string:tree_uuid>/export', methods=['GET'])
@file_response @file_response # type: ignore[misc]
def export(tree_uuid: str): def export(tree_uuid: str) -> Response:
to_return = lookyloo.get_capture(tree_uuid) to_return = lookyloo.get_capture(tree_uuid)
return send_file(to_return, mimetype='application/zip', return send_file(to_return, mimetype='application/zip',
as_attachment=True, download_name='capture.zip') as_attachment=True, download_name='capture.zip')
@app.route('/tree/<string:tree_uuid>/urls_rendered_page', methods=['GET']) @app.route('/tree/<string:tree_uuid>/urls_rendered_page', methods=['GET'])
def urls_rendered_page(tree_uuid: str): def urls_rendered_page(tree_uuid: str) -> WerkzeugResponse | str | Response:
try: try:
urls = lookyloo.get_urls_rendered_page(tree_uuid) urls = lookyloo.get_urls_rendered_page(tree_uuid)
return render_template('urls_rendered.html', base_tree_uuid=tree_uuid, urls=urls) return render_template('urls_rendered.html', base_tree_uuid=tree_uuid, urls=urls)
@ -628,7 +631,7 @@ def urls_rendered_page(tree_uuid: str):
@app.route('/tree/<string:tree_uuid>/hashlookup', methods=['GET']) @app.route('/tree/<string:tree_uuid>/hashlookup', methods=['GET'])
def hashlookup(tree_uuid: str): def hashlookup(tree_uuid: str) -> str | WerkzeugResponse | Response:
merged, total_ressources = lookyloo.merge_hashlookup_tree(tree_uuid) merged, total_ressources = lookyloo.merge_hashlookup_tree(tree_uuid)
# We only want unique URLs for the template # We only want unique URLs for the template
for sha1, entries in merged.items(): for sha1, entries in merged.items():
@ -637,7 +640,7 @@ def hashlookup(tree_uuid: str):
@app.route('/bulk_captures/<string:base_tree_uuid>', methods=['POST']) @app.route('/bulk_captures/<string:base_tree_uuid>', methods=['POST'])
def bulk_captures(base_tree_uuid: str): def bulk_captures(base_tree_uuid: str) -> WerkzeugResponse | str | Response:
if flask_login.current_user.is_authenticated: if flask_login.current_user.is_authenticated:
user = flask_login.current_user.get_id() user = flask_login.current_user.get_id()
else: else:
@ -666,16 +669,16 @@ def bulk_captures(base_tree_uuid: str):
@app.route('/tree/<string:tree_uuid>/hide', methods=['GET']) @app.route('/tree/<string:tree_uuid>/hide', methods=['GET'])
@flask_login.login_required @flask_login.login_required # type: ignore[misc]
def hide_capture(tree_uuid: str): def hide_capture(tree_uuid: str) -> WerkzeugResponse:
lookyloo.hide_capture(tree_uuid) lookyloo.hide_capture(tree_uuid)
flash('Successfully hidden.', 'success') flash('Successfully hidden.', 'success')
return redirect(url_for('tree', tree_uuid=tree_uuid)) return redirect(url_for('tree', tree_uuid=tree_uuid))
@app.route('/tree/<string:tree_uuid>/rebuild') @app.route('/tree/<string:tree_uuid>/rebuild')
@flask_login.login_required @flask_login.login_required # type: ignore[misc]
def rebuild_tree(tree_uuid: str): def rebuild_tree(tree_uuid: str) -> WerkzeugResponse:
try: try:
lookyloo.remove_pickle(tree_uuid) lookyloo.remove_pickle(tree_uuid)
flash('Successfully rebuilt.', 'success') flash('Successfully rebuilt.', 'success')
@ -685,13 +688,13 @@ def rebuild_tree(tree_uuid: str):
@app.route('/tree/<string:tree_uuid>/cache', methods=['GET']) @app.route('/tree/<string:tree_uuid>/cache', methods=['GET'])
def cache_tree(tree_uuid: str): def cache_tree(tree_uuid: str) -> WerkzeugResponse:
lookyloo.capture_cache(tree_uuid) lookyloo.capture_cache(tree_uuid)
return redirect(url_for('index')) return redirect(url_for('index'))
@app.route('/tree/<string:tree_uuid>/monitor', methods=['POST', 'GET']) @app.route('/tree/<string:tree_uuid>/monitor', methods=['POST', 'GET'])
def monitor(tree_uuid: str): def monitor(tree_uuid: str) -> WerkzeugResponse:
if not lookyloo.monitoring_enabled: if not lookyloo.monitoring_enabled:
return redirect(url_for('tree', tree_uuid=tree_uuid)) return redirect(url_for('tree', tree_uuid=tree_uuid))
if request.form.get('name') or not request.form.get('confirm'): if request.form.get('name') or not request.form.get('confirm'):
@ -702,7 +705,7 @@ def monitor(tree_uuid: str):
collection: str = request.form['collection'] if request.form.get('collection') else '' collection: str = request.form['collection'] if request.form.get('collection') else ''
notification_email: str = request.form['notification'] if request.form.get('notification') else '' notification_email: str = request.form['notification'] if request.form.get('notification') else ''
frequency: str = request.form['frequency'] if request.form.get('frequency') else 'daily' frequency: str = request.form['frequency'] if request.form.get('frequency') else 'daily'
expire_at: Optional[float] = datetime.fromisoformat(request.form['expire_at']).timestamp() if request.form.get('expire_at') else None expire_at: float | None = datetime.fromisoformat(request.form['expire_at']).timestamp() if request.form.get('expire_at') else None
cache = lookyloo.capture_cache(tree_uuid) cache = lookyloo.capture_cache(tree_uuid)
if cache: if cache:
monitoring_uuid = lookyloo.monitoring.monitor({'url': cache.url, 'user_agent': cache.user_agent, 'listing': False}, monitoring_uuid = lookyloo.monitoring.monitor({'url': cache.url, 'user_agent': cache.user_agent, 'listing': False},
@ -719,7 +722,7 @@ def monitor(tree_uuid: str):
@app.route('/tree/<string:tree_uuid>/send_mail', methods=['POST', 'GET']) @app.route('/tree/<string:tree_uuid>/send_mail', methods=['POST', 'GET'])
def send_mail(tree_uuid: str): def send_mail(tree_uuid: str) -> WerkzeugResponse:
if not enable_mail_notification: if not enable_mail_notification:
return redirect(url_for('tree', tree_uuid=tree_uuid)) return redirect(url_for('tree', tree_uuid=tree_uuid))
if request.form.get('name') or not request.form.get('confirm'): if request.form.get('name') or not request.form.get('confirm'):
@ -739,7 +742,7 @@ def send_mail(tree_uuid: str):
@app.route('/tree/<string:tree_uuid>', methods=['GET']) @app.route('/tree/<string:tree_uuid>', methods=['GET'])
@app.route('/tree/<string:tree_uuid>/<string:node_uuid>', methods=['GET']) @app.route('/tree/<string:tree_uuid>/<string:node_uuid>', methods=['GET'])
def tree(tree_uuid: str, node_uuid: Optional[str]=None): def tree(tree_uuid: str, node_uuid: str | None=None) -> Response | str | WerkzeugResponse:
if tree_uuid == 'False': if tree_uuid == 'False':
flash("Unable to process your request.", 'warning') flash("Unable to process your request.", 'warning')
return redirect(url_for('index')) return redirect(url_for('index'))
@ -820,10 +823,10 @@ def tree(tree_uuid: str, node_uuid: Optional[str]=None):
@app.route('/tree/<string:tree_uuid>/mark_as_legitimate', methods=['POST']) @app.route('/tree/<string:tree_uuid>/mark_as_legitimate', methods=['POST'])
@flask_login.login_required @flask_login.login_required # type: ignore[misc]
def mark_as_legitimate(tree_uuid: str): def mark_as_legitimate(tree_uuid: str) -> Response:
if request.data: if request.data:
legitimate_entries: Dict = request.get_json(force=True) legitimate_entries: dict[str, Any] = request.get_json(force=True)
lookyloo.add_to_legitimate(tree_uuid, **legitimate_entries) lookyloo.add_to_legitimate(tree_uuid, **legitimate_entries)
else: else:
lookyloo.add_to_legitimate(tree_uuid) lookyloo.add_to_legitimate(tree_uuid)
@ -831,13 +834,13 @@ def mark_as_legitimate(tree_uuid: str):
@app.route('/tree/<string:tree_uuid>/body_hashes', methods=['GET']) @app.route('/tree/<string:tree_uuid>/body_hashes', methods=['GET'])
def tree_body_hashes(tree_uuid: str): def tree_body_hashes(tree_uuid: str) -> str:
body_hashes = lookyloo.get_all_body_hashes(tree_uuid) body_hashes = lookyloo.get_all_body_hashes(tree_uuid)
return render_template('tree_body_hashes.html', tree_uuid=tree_uuid, body_hashes=body_hashes) return render_template('tree_body_hashes.html', tree_uuid=tree_uuid, body_hashes=body_hashes)
@app.route('/tree/<string:tree_uuid>/pandora', methods=['GET', 'POST']) @app.route('/tree/<string:tree_uuid>/pandora', methods=['GET', 'POST'])
def pandora_submit(tree_uuid: str): def pandora_submit(tree_uuid: str) -> dict[str, Any] | Response:
node_uuid = None node_uuid = None
if request.method == 'POST': if request.method == 'POST':
input_json = request.get_json(force=True) input_json = request.get_json(force=True)
@ -860,14 +863,14 @@ def pandora_submit(tree_uuid: str):
# ##### helpers ##### # ##### helpers #####
def index_generic(show_hidden: bool=False, show_error: bool=True, category: Optional[str]=None): def index_generic(show_hidden: bool=False, show_error: bool=True, category: str | None=None) -> str:
"""This method is used to generate the index page. It is possible that some of the captures """This method is used to generate the index page. It is possible that some of the captures
do not have their pickle yet. do not have their pickle yet.
We must assume that calling cached.tree will fail, and handle it gracefully. We must assume that calling cached.tree will fail, and handle it gracefully.
""" """
titles = [] titles = []
cut_time: Optional[datetime] = None cut_time: datetime | None = None
if time_delta_on_index: if time_delta_on_index:
# We want to filter the captures on the index # We want to filter the captures on the index
cut_time = (datetime.now() - timedelta(**time_delta_on_index)) cut_time = (datetime.now() - timedelta(**time_delta_on_index))
@ -899,7 +902,7 @@ def index_generic(show_hidden: bool=False, show_error: bool=True, category: Opti
version=pkg_version) version=pkg_version)
def get_index_params(request): def get_index_params(request: Request) -> tuple[bool, str]:
show_error: bool = True show_error: bool = True
category: str = '' category: str = ''
if hide_captures_with_error: if hide_captures_with_error:
@ -913,7 +916,7 @@ def get_index_params(request):
# ##### Index level methods ##### # ##### Index level methods #####
@app.route('/', methods=['GET']) @app.route('/', methods=['GET'])
def index(): def index() -> str:
if request.method == 'HEAD': if request.method == 'HEAD':
# Just returns ack if the webserver is running # Just returns ack if the webserver is running
return 'Ack' return 'Ack'
@ -922,28 +925,28 @@ def index():
@app.route('/hidden', methods=['GET']) @app.route('/hidden', methods=['GET'])
@flask_login.login_required @flask_login.login_required # type: ignore[misc]
def index_hidden(): def index_hidden() -> str:
show_error, category = get_index_params(request) show_error, category = get_index_params(request)
return index_generic(show_hidden=True, show_error=show_error, category=category) return index_generic(show_hidden=True, show_error=show_error, category=category)
@app.route('/cookies', methods=['GET']) @app.route('/cookies', methods=['GET'])
def cookies_lookup(): def cookies_lookup() -> str:
cookies_names = [(name, freq, lookyloo.indexing.cookies_names_number_domains(name)) cookies_names = [(name, freq, lookyloo.indexing.cookies_names_number_domains(name))
for name, freq in lookyloo.indexing.cookies_names] for name, freq in lookyloo.indexing.cookies_names]
return render_template('cookies.html', cookies_names=cookies_names) return render_template('cookies.html', cookies_names=cookies_names)
@app.route('/hhhashes', methods=['GET']) @app.route('/hhhashes', methods=['GET'])
def hhhashes_lookup(): def hhhashes_lookup() -> str:
hhhashes = [(hhh, freq, lookyloo.indexing.http_headers_hashes_number_captures(hhh)) hhhashes = [(hhh, freq, lookyloo.indexing.http_headers_hashes_number_captures(hhh))
for hhh, freq in lookyloo.indexing.http_headers_hashes] for hhh, freq in lookyloo.indexing.http_headers_hashes]
return render_template('hhhashes.html', hhhashes=hhhashes) return render_template('hhhashes.html', hhhashes=hhhashes)
@app.route('/ressources', methods=['GET']) @app.route('/ressources', methods=['GET'])
def ressources(): def ressources() -> str:
ressources = [] ressources = []
for h, freq in lookyloo.indexing.ressources: for h, freq in lookyloo.indexing.ressources:
domain_freq = lookyloo.indexing.ressources_number_domains(h) domain_freq = lookyloo.indexing.ressources_number_domains(h)
@ -961,26 +964,26 @@ def ressources():
@app.route('/categories', methods=['GET']) @app.route('/categories', methods=['GET'])
def categories(): def categories() -> str:
return render_template('categories.html', categories=lookyloo.indexing.categories) return render_template('categories.html', categories=lookyloo.indexing.categories)
@app.route('/rebuild_all') @app.route('/rebuild_all')
@flask_login.login_required @flask_login.login_required # type: ignore[misc]
def rebuild_all(): def rebuild_all() -> WerkzeugResponse:
lookyloo.rebuild_all() lookyloo.rebuild_all()
return redirect(url_for('index')) return redirect(url_for('index'))
@app.route('/rebuild_cache') @app.route('/rebuild_cache')
@flask_login.login_required @flask_login.login_required # type: ignore[misc]
def rebuild_cache(): def rebuild_cache() -> WerkzeugResponse:
lookyloo.rebuild_cache() lookyloo.rebuild_cache()
return redirect(url_for('index')) return redirect(url_for('index'))
@app.route('/search', methods=['GET', 'POST']) @app.route('/search', methods=['GET', 'POST'])
def search(): def search() -> str | Response | WerkzeugResponse:
if request.form.get('url'): if request.form.get('url'):
quoted_url: str = quote_plus(request.form['url']) quoted_url: str = quote_plus(request.form['url'])
return redirect(url_for('url_details', url=quoted_url)) return redirect(url_for('url_details', url=quoted_url))
@ -993,7 +996,7 @@ def search():
return render_template('search.html') return render_template('search.html')
def _prepare_capture_template(user_ua: Optional[str], predefined_url: Optional[str]=None): def _prepare_capture_template(user_ua: str | None, predefined_url: str | None=None) -> str:
return render_template('capture.html', user_agents=user_agents.user_agents, return render_template('capture.html', user_agents=user_agents.user_agents,
default=user_agents.default, default=user_agents.default,
personal_ua=user_ua, personal_ua=user_ua,
@ -1004,7 +1007,7 @@ def _prepare_capture_template(user_ua: Optional[str], predefined_url: Optional[s
@app.route('/recapture/<string:tree_uuid>', methods=['GET']) @app.route('/recapture/<string:tree_uuid>', methods=['GET'])
def recapture(tree_uuid: str): def recapture(tree_uuid: str) -> str | Response | WerkzeugResponse:
cache = lookyloo.capture_cache(tree_uuid) cache = lookyloo.capture_cache(tree_uuid)
if cache and hasattr(cache, 'url'): if cache and hasattr(cache, 'url'):
return _prepare_capture_template(user_ua=request.headers.get('User-Agent'), return _prepare_capture_template(user_ua=request.headers.get('User-Agent'),
@ -1016,15 +1019,15 @@ def recapture(tree_uuid: str):
# ################## Submit existing capture ################## # ################## Submit existing capture ##################
@app.route('/submit_capture', methods=['GET', 'POST']) @app.route('/submit_capture', methods=['GET', 'POST'])
def submit_capture(): def submit_capture() -> str | Response | WerkzeugResponse:
if request.method == 'POST': if request.method == 'POST':
listing = True if request.form.get('listing') else False listing = True if request.form.get('listing') else False
uuid = str(uuid4()) # NOTE: new UUID, because we do not want duplicates uuid = str(uuid4()) # NOTE: new UUID, because we do not want duplicates
har: Optional[Dict[str, Any]] = None har: dict[str, Any] | None = None
html: Optional[str] = None html: str | None = None
last_redirected_url: Optional[str] = None last_redirected_url: str | None = None
screenshot: Optional[bytes] = None screenshot: bytes | None = None
if 'har_file' in request.files and request.files['har_file']: if 'har_file' in request.files and request.files['har_file']:
har = json.loads(request.files['har_file'].stream.read()) har = json.loads(request.files['har_file'].stream.read())
last_redirected_url = request.form.get('landing_page') last_redirected_url = request.form.get('landing_page')
@ -1038,7 +1041,7 @@ def submit_capture():
return redirect(url_for('tree', tree_uuid=uuid)) return redirect(url_for('tree', tree_uuid=uuid))
elif 'full_capture' in request.files and request.files['full_capture']: elif 'full_capture' in request.files and request.files['full_capture']:
# it *only* accepts a lookyloo export. # it *only* accepts a lookyloo export.
cookies: Optional[List[Dict[str, str]]] = None cookies: list[dict[str, str]] | None = None
has_error = False has_error = False
with ZipFile(BytesIO(request.files['full_capture'].stream.read()), 'r') as lookyloo_capture: with ZipFile(BytesIO(request.files['full_capture'].stream.read()), 'r') as lookyloo_capture:
potential_favicons = set() potential_favicons = set()
@ -1084,7 +1087,7 @@ def submit_capture():
# ############################################################# # #############################################################
@app.route('/capture', methods=['GET', 'POST']) @app.route('/capture', methods=['GET', 'POST'])
def capture_web(): def capture_web() -> str | Response | WerkzeugResponse:
if flask_login.current_user.is_authenticated: if flask_login.current_user.is_authenticated:
user = flask_login.current_user.get_id() user = flask_login.current_user.get_id()
else: else:
@ -1143,7 +1146,7 @@ def capture_web():
parsed_proxy = urlparse(request.form['proxy']) parsed_proxy = urlparse(request.form['proxy'])
if parsed_proxy.scheme and parsed_proxy.hostname and parsed_proxy.port: if parsed_proxy.scheme and parsed_proxy.hostname and parsed_proxy.port:
if parsed_proxy.scheme in ['http', 'https', 'socks5']: if parsed_proxy.scheme in ['http', 'https', 'socks5']:
if (parsed_proxy.username and parsed_proxy.password) != (not parsed_proxy.username and not parsed_proxy.password): if (parsed_proxy.username and parsed_proxy.password) or (not parsed_proxy.username and not parsed_proxy.password):
capture_query['proxy'] = request.form['proxy'] capture_query['proxy'] = request.form['proxy']
else: else:
flash('You need to enter a username AND a password for your proxy.', 'error') flash('You need to enter a username AND a password for your proxy.', 'error')
@ -1192,47 +1195,47 @@ def capture_web():
@app.route('/cookies/<string:cookie_name>', methods=['GET']) @app.route('/cookies/<string:cookie_name>', methods=['GET'])
def cookies_name_detail(cookie_name: str): def cookies_name_detail(cookie_name: str) -> str:
captures, domains = lookyloo.get_cookie_name_investigator(cookie_name.strip()) captures, domains = lookyloo.get_cookie_name_investigator(cookie_name.strip())
return render_template('cookie_name.html', cookie_name=cookie_name, domains=domains, captures=captures) return render_template('cookie_name.html', cookie_name=cookie_name, domains=domains, captures=captures)
@app.route('/hhhdetails/<string:hhh>', methods=['GET']) @app.route('/hhhdetails/<string:hhh>', methods=['GET'])
def hhh_detail(hhh: str): def hhh_detail(hhh: str) -> str:
captures, headers = lookyloo.get_hhh_investigator(hhh.strip()) captures, headers = lookyloo.get_hhh_investigator(hhh.strip())
return render_template('hhh_details.html', hhh=hhh, captures=captures, headers=headers) return render_template('hhh_details.html', hhh=hhh, captures=captures, headers=headers)
@app.route('/body_hashes/<string:body_hash>', methods=['GET']) @app.route('/body_hashes/<string:body_hash>', methods=['GET'])
def body_hash_details(body_hash: str): def body_hash_details(body_hash: str) -> str:
from_popup = True if (request.args.get('from_popup') and request.args.get('from_popup') == 'True') else False from_popup = True if (request.args.get('from_popup') and request.args.get('from_popup') == 'True') else False
captures, domains = lookyloo.get_body_hash_investigator(body_hash.strip()) captures, domains = lookyloo.get_body_hash_investigator(body_hash.strip())
return render_template('body_hash.html', body_hash=body_hash, domains=domains, captures=captures, from_popup=from_popup) return render_template('body_hash.html', body_hash=body_hash, domains=domains, captures=captures, from_popup=from_popup)
@app.route('/urls/<string:url>', methods=['GET']) @app.route('/urls/<string:url>', methods=['GET'])
def url_details(url: str): def url_details(url: str) -> str:
url = unquote_plus(url).strip() url = unquote_plus(url).strip()
hits = lookyloo.get_url_occurrences(url, limit=50) hits = lookyloo.get_url_occurrences(url, limit=50)
return render_template('url.html', url=url, hits=hits) return render_template('url.html', url=url, hits=hits)
@app.route('/hostnames/<string:hostname>', methods=['GET']) @app.route('/hostnames/<string:hostname>', methods=['GET'])
def hostname_details(hostname: str): def hostname_details(hostname: str) -> str:
hits = lookyloo.get_hostname_occurrences(hostname.strip(), with_urls_occurrences=True, limit=50) hits = lookyloo.get_hostname_occurrences(hostname.strip(), with_urls_occurrences=True, limit=50)
return render_template('hostname.html', hostname=hostname, hits=hits) return render_template('hostname.html', hostname=hostname, hits=hits)
@app.route('/stats', methods=['GET']) @app.route('/stats', methods=['GET'])
def statsfull(): def statsfull() -> str:
stats = lookyloo.get_stats() stats = lookyloo.get_stats()
return render_template('stats.html', stats=stats) return render_template('stats.html', stats=stats)
@app.route('/whois/<string:query>', methods=['GET']) @app.route('/whois/<string:query>', methods=['GET'])
@app.route('/whois/<string:query>/<int:email_only>', methods=['GET']) @app.route('/whois/<string:query>/<int:email_only>', methods=['GET'])
@file_response @file_response # type: ignore[misc]
def whois(query: str, email_only: int=0): def whois(query: str, email_only: int=0) -> Response:
to_return = lookyloo.uwhois.whois(query, bool(email_only)) to_return = lookyloo.uwhois.whois(query, bool(email_only))
if isinstance(to_return, str): if isinstance(to_return, str):
return send_file(BytesIO(to_return.encode()), return send_file(BytesIO(to_return.encode()),
@ -1243,35 +1246,35 @@ def whois(query: str, email_only: int=0):
# ##### Methods related to a specific URLNode ##### # ##### Methods related to a specific URLNode #####
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/request_cookies', methods=['GET']) @app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/request_cookies', methods=['GET'])
@file_response @file_response # type: ignore[misc]
def urlnode_request_cookies(tree_uuid: str, node_uuid: str): def urlnode_request_cookies(tree_uuid: str, node_uuid: str) -> Response | None:
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid) urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
if not urlnode.request_cookie: if not urlnode.request_cookie:
return return None
return send_file(BytesIO(json.dumps(urlnode.request_cookie, indent=2).encode()), return send_file(BytesIO(json.dumps(urlnode.request_cookie, indent=2).encode()),
mimetype='text/plain', as_attachment=True, download_name='request_cookies.txt') mimetype='text/plain', as_attachment=True, download_name='request_cookies.txt')
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/response_cookies', methods=['GET']) @app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/response_cookies', methods=['GET'])
@file_response @file_response # type: ignore[misc]
def urlnode_response_cookies(tree_uuid: str, node_uuid: str): def urlnode_response_cookies(tree_uuid: str, node_uuid: str) -> Response | None:
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid) urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
if not urlnode.response_cookie: if not urlnode.response_cookie:
return return None
return send_file(BytesIO(json.dumps(urlnode.response_cookie, indent=2).encode()), return send_file(BytesIO(json.dumps(urlnode.response_cookie, indent=2).encode()),
mimetype='text/plain', as_attachment=True, download_name='response_cookies.txt') mimetype='text/plain', as_attachment=True, download_name='response_cookies.txt')
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/urls_in_rendered_content', methods=['GET']) @app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/urls_in_rendered_content', methods=['GET'])
@file_response @file_response # type: ignore[misc]
def urlnode_urls_in_rendered_content(tree_uuid: str, node_uuid: str): def urlnode_urls_in_rendered_content(tree_uuid: str, node_uuid: str) -> Response | None:
# Note: we could simplify it with lookyloo.get_urls_rendered_page, but if at somepoint, # Note: we could simplify it with lookyloo.get_urls_rendered_page, but if at somepoint,
# we have multiple page rendered on one tree, it will be a problem. # we have multiple page rendered on one tree, it will be a problem.
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid) urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
if not hasattr(urlnode, 'rendered_html') or not urlnode.rendered_html: if not hasattr(urlnode, 'rendered_html') or not urlnode.rendered_html:
return return None
ct = lookyloo.get_crawled_tree(tree_uuid) ct = lookyloo.get_crawled_tree(tree_uuid)
not_loaded_urls = sorted(set(urlnode.urls_in_rendered_page) not_loaded_urls = sorted(set(urlnode.urls_in_rendered_page)
@ -1283,22 +1286,22 @@ def urlnode_urls_in_rendered_content(tree_uuid: str, node_uuid: str):
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/rendered_content', methods=['GET']) @app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/rendered_content', methods=['GET'])
@file_response @file_response # type: ignore[misc]
def urlnode_rendered_content(tree_uuid: str, node_uuid: str): def urlnode_rendered_content(tree_uuid: str, node_uuid: str) -> Response | None:
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid) urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
if not urlnode.rendered_html: if not urlnode.rendered_html:
return return None
return send_file(BytesIO(urlnode.rendered_html.getvalue()), mimetype='text/plain', return send_file(BytesIO(urlnode.rendered_html.getvalue()), mimetype='text/plain',
as_attachment=True, download_name='rendered_content.txt') as_attachment=True, download_name='rendered_content.txt')
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/posted_data', methods=['GET']) @app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/posted_data', methods=['GET'])
@file_response @file_response # type: ignore[misc]
def urlnode_post_request(tree_uuid: str, node_uuid: str): def urlnode_post_request(tree_uuid: str, node_uuid: str) -> Response | None:
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid) urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
if not urlnode.posted_data: if not urlnode.posted_data:
return return None
posted: Union[str, bytes] posted: str | bytes
if isinstance(urlnode.posted_data, (dict, list)): if isinstance(urlnode.posted_data, (dict, list)):
# JSON blob, pretty print. # JSON blob, pretty print.
posted = json.dumps(urlnode.posted_data, indent=2) posted = json.dumps(urlnode.posted_data, indent=2)
@ -1322,8 +1325,8 @@ def urlnode_post_request(tree_uuid: str, node_uuid: str):
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/ressource', methods=['POST', 'GET']) @app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/ressource', methods=['POST', 'GET'])
@file_response @file_response # type: ignore[misc]
def get_ressource(tree_uuid: str, node_uuid: str): def get_ressource(tree_uuid: str, node_uuid: str) -> Response:
if request.method == 'POST': if request.method == 'POST':
h_request = request.form.get('ressource_hash') h_request = request.form.get('ressource_hash')
else: else:
@ -1343,8 +1346,8 @@ def get_ressource(tree_uuid: str, node_uuid: str):
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/ressource_preview', methods=['GET']) @app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/ressource_preview', methods=['GET'])
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/ressource_preview/<string:h_ressource>', methods=['GET']) @app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/ressource_preview/<string:h_ressource>', methods=['GET'])
@file_response @file_response # type: ignore[misc]
def get_ressource_preview(tree_uuid: str, node_uuid: str, h_ressource: Optional[str]=None): def get_ressource_preview(tree_uuid: str, node_uuid: str, h_ressource: str | None=None) -> Response:
ressource = lookyloo.get_ressource(tree_uuid, node_uuid, h_ressource) ressource = lookyloo.get_ressource(tree_uuid, node_uuid, h_ressource)
if not ressource: if not ressource:
return Response('No preview available.', mimetype='text/text') return Response('No preview available.', mimetype='text/text')
@ -1356,16 +1359,16 @@ def get_ressource_preview(tree_uuid: str, node_uuid: str, h_ressource: Optional[
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/hashes', methods=['GET']) @app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/hashes', methods=['GET'])
@file_response @file_response # type: ignore[misc]
def hashes_urlnode(tree_uuid: str, node_uuid: str): def hashes_urlnode(tree_uuid: str, node_uuid: str) -> Response:
hashes = lookyloo.get_hashes(tree_uuid, urlnode_uuid=node_uuid) hashes = lookyloo.get_hashes(tree_uuid, urlnode_uuid=node_uuid)
return send_file(BytesIO('\n'.join(hashes).encode()), return send_file(BytesIO('\n'.join(hashes).encode()),
mimetype='test/plain', as_attachment=True, download_name='hashes.txt') mimetype='test/plain', as_attachment=True, download_name='hashes.txt')
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/add_context', methods=['POST']) @app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/add_context', methods=['POST'])
@flask_login.login_required @flask_login.login_required # type: ignore[misc]
def add_context(tree_uuid: str, node_uuid: str): def add_context(tree_uuid: str, node_uuid: str) -> WerkzeugResponse | None:
if not enable_context_by_users: if not enable_context_by_users:
return redirect(url_for('ressources')) return redirect(url_for('ressources'))
@ -1375,7 +1378,7 @@ def add_context(tree_uuid: str, node_uuid: str):
callback_str: str = context_data['callback_str'] callback_str: str = context_data['callback_str']
legitimate: bool = True if context_data.get('legitimate') else False legitimate: bool = True if context_data.get('legitimate') else False
malicious: bool = True if context_data.get('malicious') else False malicious: bool = True if context_data.get('malicious') else False
details: Dict[str, Dict] = {'malicious': {}, 'legitimate': {}} details: dict[str, dict[str, Any]] = {'malicious': {}, 'legitimate': {}}
if malicious: if malicious:
malicious_details = {} malicious_details = {}
if context_data.get('malicious_type'): if context_data.get('malicious_type'):
@ -1396,6 +1399,7 @@ def add_context(tree_uuid: str, node_uuid: str):
return redirect(url_for('hostnode_popup', tree_uuid=tree_uuid, node_uuid=hostnode_uuid)) return redirect(url_for('hostnode_popup', tree_uuid=tree_uuid, node_uuid=hostnode_uuid))
elif callback_str == 'ressources': elif callback_str == 'ressources':
return redirect(url_for('ressources')) return redirect(url_for('ressources'))
return None
# Query API # Query API

View File

@ -1,20 +1,22 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import base64 import base64
import hashlib import hashlib
import json import json
from io import BytesIO from io import BytesIO
from typing import Any, Dict, Optional from typing import Any, Dict, Optional, Tuple, List
from zipfile import ZipFile from zipfile import ZipFile
import flask_login # type: ignore import flask_login # type: ignore
from flask import request, send_file from flask import request, send_file, Response
from flask_restx import Namespace, Resource, abort, fields # type: ignore from flask_restx import Namespace, Resource, abort, fields # type: ignore
from werkzeug.security import check_password_hash from werkzeug.security import check_password_hash
from lacuscore import CaptureStatus as CaptureStatusCore from lacuscore import CaptureStatus as CaptureStatusCore
from pylacus import CaptureStatus as CaptureStatusPy from pylacus import CaptureStatus as CaptureStatusPy # type: ignore[attr-defined]
from lookyloo.comparator import Comparator from lookyloo.comparator import Comparator
from lookyloo.exceptions import MissingUUID, NoValidHarFile from lookyloo.exceptions import MissingUUID, NoValidHarFile
from lookyloo.lookyloo import CaptureSettings, Lookyloo from lookyloo.lookyloo import CaptureSettings, Lookyloo
@ -27,7 +29,7 @@ lookyloo: Lookyloo = get_lookyloo_instance()
comparator: Comparator = Comparator() comparator: Comparator = Comparator()
def api_auth_check(method): def api_auth_check(method): # type: ignore
if flask_login.current_user.is_authenticated or load_user_from_request(request): if flask_login.current_user.is_authenticated or load_user_from_request(request):
return method return method
abort(403, 'Authentication required.') abort(403, 'Authentication required.')
@ -39,30 +41,30 @@ token_request_fields = api.model('AuthTokenFields', {
}) })
@api.errorhandler(NoValidHarFile) @api.errorhandler(NoValidHarFile) # type: ignore[misc]
def handle_no_HAR_file_exception(error): def handle_no_HAR_file_exception(error: Any) -> tuple[dict[str, str], int]:
'''The capture has no HAR file, it failed for some reason.''' '''The capture has no HAR file, it failed for some reason.'''
return {'message': str(error)}, 400 return {'message': str(error)}, 400
@api.route('/json/get_token') @api.route('/json/get_token')
@api.doc(description='Get the API token required for authenticated calls') @api.doc(description='Get the API token required for authenticated calls')
class AuthToken(Resource): class AuthToken(Resource): # type: ignore[misc]
users_table = build_users_table() users_table = build_users_table()
@api.param('username', 'Your username') @api.param('username', 'Your username') # type: ignore[misc]
@api.param('password', 'Your password') @api.param('password', 'Your password') # type: ignore[misc]
def get(self): def get(self) -> dict[str, str] | tuple[dict[str, str], int]:
username: Optional[str] = request.args['username'] if request.args.get('username') else None username: str | None = request.args['username'] if request.args.get('username') else None
password: Optional[str] = request.args['password'] if request.args.get('password') else None password: str | None = request.args['password'] if request.args.get('password') else None
if username and password and username in self.users_table and check_password_hash(self.users_table[username]['password'], password): if username and password and username in self.users_table and check_password_hash(self.users_table[username]['password'], password):
return {'authkey': self.users_table[username]['authkey']} return {'authkey': self.users_table[username]['authkey']}
return {'error': 'User/Password invalid.'}, 401 return {'error': 'User/Password invalid.'}, 401
@api.doc(body=token_request_fields) @api.doc(body=token_request_fields) # type: ignore[misc]
def post(self): def post(self) -> dict[str, str] | tuple[dict[str, str], int]:
auth: Dict = request.get_json(force=True) auth: dict[str, Any] = request.get_json(force=True)
if 'username' in auth and 'password' in auth: # Expected keys in json if 'username' in auth and 'password' in auth: # Expected keys in json
if (auth['username'] in self.users_table if (auth['username'] in self.users_table
and check_password_hash(self.users_table[auth['username']]['password'], auth['password'])): and check_password_hash(self.users_table[auth['username']]['password'], auth['password'])):
@ -73,13 +75,13 @@ class AuthToken(Resource):
@api.route('/json/<string:capture_uuid>/status') @api.route('/json/<string:capture_uuid>/status')
@api.doc(description='Get the status of a capture', @api.doc(description='Get the status of a capture',
params={'capture_uuid': 'The UUID of the capture'}) params={'capture_uuid': 'The UUID of the capture'})
class CaptureStatusQuery(Resource): class CaptureStatusQuery(Resource): # type: ignore[misc]
@api.param('with_error', 'Add the error message of the capture (if there is one)') @api.param('with_error', 'Add the error message of the capture (if there is one)') # type: ignore[misc]
def get(self, capture_uuid: str): def get(self, capture_uuid: str) -> dict[str, Any]:
with_error: bool = True if request.args.get('with_error') else False with_error: bool = True if request.args.get('with_error') else False
status_code = lookyloo.get_capture_status(capture_uuid) status_code = lookyloo.get_capture_status(capture_uuid)
to_return: Dict[str, Any] = {'status_code': status_code} to_return: dict[str, Any] = {'status_code': status_code}
if status_code in [CaptureStatusCore.DONE, CaptureStatusPy.DONE] and with_error: if status_code in [CaptureStatusCore.DONE, CaptureStatusPy.DONE] and with_error:
cache = lookyloo.capture_cache(capture_uuid) cache = lookyloo.capture_cache(capture_uuid)
if cache and cache.error: if cache and cache.error:
@ -90,40 +92,40 @@ class CaptureStatusQuery(Resource):
@api.route('/json/<string:capture_uuid>/hostnames') @api.route('/json/<string:capture_uuid>/hostnames')
@api.doc(description='Get all the hostnames of all the resources of a capture', @api.doc(description='Get all the hostnames of all the resources of a capture',
params={'capture_uuid': 'The UUID of the capture'}) params={'capture_uuid': 'The UUID of the capture'})
class CaptureHostnames(Resource): class CaptureHostnames(Resource): # type: ignore[misc]
def get(self, capture_uuid: str): def get(self, capture_uuid: str) -> dict[str, Any] | tuple[dict[str, Any], int]:
cache = lookyloo.capture_cache(capture_uuid) cache = lookyloo.capture_cache(capture_uuid)
if not cache: if not cache:
return {'error': 'UUID missing in cache, try again later and check the status first.'}, 400 return {'error': 'UUID missing in cache, try again later and check the status first.'}, 400
to_return: Dict[str, Any] = {'response': {'hostnames': list(lookyloo.get_hostnames(capture_uuid))}} to_return: dict[str, Any] = {'response': {'hostnames': list(lookyloo.get_hostnames(capture_uuid))}}
return to_return return to_return
@api.route('/json/<string:capture_uuid>/urls') @api.route('/json/<string:capture_uuid>/urls')
@api.doc(description='Get all the URLs of all the resources of a capture', @api.doc(description='Get all the URLs of all the resources of a capture',
params={'capture_uuid': 'The UUID of the capture'}) params={'capture_uuid': 'The UUID of the capture'})
class CaptureURLs(Resource): class CaptureURLs(Resource): # type: ignore[misc]
def get(self, capture_uuid: str): def get(self, capture_uuid: str) -> dict[str, Any] | tuple[dict[str, Any], int]:
cache = lookyloo.capture_cache(capture_uuid) cache = lookyloo.capture_cache(capture_uuid)
if not cache: if not cache:
return {'error': 'UUID missing in cache, try again later and check the status first.'}, 400 return {'error': 'UUID missing in cache, try again later and check the status first.'}, 400
to_return: Dict[str, Any] = {'response': {'urls': list(lookyloo.get_urls(capture_uuid))}} to_return: dict[str, Any] = {'response': {'urls': list(lookyloo.get_urls(capture_uuid))}}
return to_return return to_return
@api.route('/json/<string:capture_uuid>/hashes') @api.route('/json/<string:capture_uuid>/hashes')
@api.doc(description='Get all the hashes of all the resources of a capture', @api.doc(description='Get all the hashes of all the resources of a capture',
params={'capture_uuid': 'The UUID of the capture'}) params={'capture_uuid': 'The UUID of the capture'})
class CaptureHashes(Resource): class CaptureHashes(Resource): # type: ignore[misc]
# Note: shake algos require a length for the digest, discarding them. # Note: shake algos require a length for the digest, discarding them.
supported_hash_algos = [algo for algo in hashlib.algorithms_available if not algo.startswith('shake')] supported_hash_algos = [algo for algo in hashlib.algorithms_available if not algo.startswith('shake')]
# NOTE: the SHA512 hashes are pre-computed in the tree, anything else must be computed on the spot # NOTE: the SHA512 hashes are pre-computed in the tree, anything else must be computed on the spot
# so we return the SHA512 hashes by default # so we return the SHA512 hashes by default
@api.param('algorithm', default='sha512', description=f'Algorithm of the hashes (default: sha512). Supported options: {", ".join(supported_hash_algos)}') @api.param('algorithm', default='sha512', description=f'Algorithm of the hashes (default: sha512). Supported options: {", ".join(supported_hash_algos)}') # type: ignore[misc]
@api.param('hashes_only', default=1, description='If 1 (default), only returns a list hashes instead of a dictionary of hashes with their respective URLs..') @api.param('hashes_only', default=1, description='If 1 (default), only returns a list hashes instead of a dictionary of hashes with their respective URLs..') # type: ignore[misc]
def get(self, capture_uuid: str): def get(self, capture_uuid: str) -> dict[str, Any] | tuple[dict[str, Any], int]:
cache = lookyloo.capture_cache(capture_uuid) cache = lookyloo.capture_cache(capture_uuid)
if not cache: if not cache:
return {'error': 'UUID missing in cache, try again later and check the status first.'}, 400 return {'error': 'UUID missing in cache, try again later and check the status first.'}, 400
@ -131,7 +133,7 @@ class CaptureHashes(Resource):
algorithm = request.args['algorithm'].lower() if request.args.get('algorithm') else 'sha512' algorithm = request.args['algorithm'].lower() if request.args.get('algorithm') else 'sha512'
hashes_only = False if 'hashes_only' in request.args and request.args['hashes_only'] in [0, '0'] else True hashes_only = False if 'hashes_only' in request.args and request.args['hashes_only'] in [0, '0'] else True
if algorithm == 'sha512' and hashes_only: if algorithm == 'sha512' and hashes_only:
to_return: Dict[str, Any] = {'response': {'hashes': list(lookyloo.get_hashes(capture_uuid))}} to_return: dict[str, Any] = {'response': {'hashes': list(lookyloo.get_hashes(capture_uuid))}}
else: else:
hashes = lookyloo.get_hashes_with_context(capture_uuid, algorithm=algorithm, urls_only=True) hashes = lookyloo.get_hashes_with_context(capture_uuid, algorithm=algorithm, urls_only=True)
to_return = {'response': {'hashes': list(hashes.keys())}} to_return = {'response': {'hashes': list(hashes.keys())}}
@ -143,13 +145,13 @@ class CaptureHashes(Resource):
@api.route('/json/<string:capture_uuid>/redirects') @api.route('/json/<string:capture_uuid>/redirects')
@api.doc(description='Get all the redirects of a capture', @api.doc(description='Get all the redirects of a capture',
params={'capture_uuid': 'The UUID of the capture'}) params={'capture_uuid': 'The UUID of the capture'})
class CaptureRedirects(Resource): class CaptureRedirects(Resource): # type: ignore[misc]
def get(self, capture_uuid: str): def get(self, capture_uuid: str) -> dict[str, Any] | tuple[dict[str, Any], int]:
cache = lookyloo.capture_cache(capture_uuid) cache = lookyloo.capture_cache(capture_uuid)
if not cache: if not cache:
return {'error': 'UUID missing in cache, try again later and check the status first.'}, 400 return {'error': 'UUID missing in cache, try again later and check the status first.'}, 400
to_return: Dict[str, Any] = {} to_return: dict[str, Any] = {}
try: try:
to_return = {'response': {'url': cache.url, to_return = {'response': {'url': cache.url,
'redirects': cache.redirects if cache.redirects else []}} 'redirects': cache.redirects if cache.redirects else []}}
@ -166,8 +168,8 @@ class CaptureRedirects(Resource):
@api.route('/json/<string:capture_uuid>/misp_export') @api.route('/json/<string:capture_uuid>/misp_export')
@api.doc(description='Get an export of the capture in MISP format', @api.doc(description='Get an export of the capture in MISP format',
params={'capture_uuid': 'The UUID of the capture'}) params={'capture_uuid': 'The UUID of the capture'})
class MISPExport(Resource): class MISPExport(Resource): # type: ignore[misc]
def get(self, capture_uuid: str): def get(self, capture_uuid: str) -> dict[str, Any] | list[dict[str, Any]]:
with_parents = request.args.get('with_parents') with_parents = request.args.get('with_parents')
event = lookyloo.misp_export(capture_uuid, True if with_parents else False) event = lookyloo.misp_export(capture_uuid, True if with_parents else False)
if isinstance(event, dict): if isinstance(event, dict):
@ -192,12 +194,12 @@ misp_push_fields = api.model('MISPPushFields', {
@api.doc(description='Push an event to a pre-configured MISP instance', @api.doc(description='Push an event to a pre-configured MISP instance',
params={'capture_uuid': 'The UUID of the capture'}, params={'capture_uuid': 'The UUID of the capture'},
security='apikey') security='apikey')
class MISPPush(Resource): class MISPPush(Resource): # type: ignore[misc]
method_decorators = [api_auth_check] method_decorators = [api_auth_check]
@api.param('with_parents', 'Also push the parents of the capture (if any)') @api.param('with_parents', 'Also push the parents of the capture (if any)') # type: ignore[misc]
@api.param('allow_duplicates', 'Push the event even if it is already present on the MISP instance') @api.param('allow_duplicates', 'Push the event even if it is already present on the MISP instance') # type: ignore[misc]
def get(self, capture_uuid: str, instance_name: Optional[str]=None): def get(self, capture_uuid: str, instance_name: str | None=None) -> dict[str, Any] | list[dict[str, Any]]:
with_parents = True if request.args.get('with_parents') else False with_parents = True if request.args.get('with_parents') else False
allow_duplicates = True if request.args.get('allow_duplicates') else False allow_duplicates = True if request.args.get('allow_duplicates') else False
@ -208,7 +210,7 @@ class MISPPush(Resource):
else: else:
return {'error': f'MISP instance "{instance_name}" does not exists.'} return {'error': f'MISP instance "{instance_name}" does not exists.'}
to_return: Dict = {} to_return: dict[str, Any] = {}
if not misp.available: if not misp.available:
to_return['error'] = 'MISP module not available.' to_return['error'] = 'MISP module not available.'
elif not misp.enable_push: elif not misp.enable_push:
@ -229,9 +231,9 @@ class MISPPush(Resource):
return to_return return to_return
@api.doc(body=misp_push_fields) @api.doc(body=misp_push_fields) # type: ignore[misc]
def post(self, capture_uuid: str, instance_name: Optional[str]=None): def post(self, capture_uuid: str, instance_name: str | None=None) -> dict[str, Any] | list[dict[str, Any]]:
parameters: Dict = request.get_json(force=True) parameters: dict[str, Any] = request.get_json(force=True)
with_parents = True if parameters.get('with_parents') else False with_parents = True if parameters.get('with_parents') else False
allow_duplicates = True if parameters.get('allow_duplicates') else False allow_duplicates = True if parameters.get('allow_duplicates') else False
if instance_name is None: if instance_name is None:
@ -241,7 +243,7 @@ class MISPPush(Resource):
else: else:
return {'error': f'MISP instance "{instance_name}" does not exists.'} return {'error': f'MISP instance "{instance_name}" does not exists.'}
to_return: Dict = {} to_return: dict[str, Any] = {}
if not misp.available: if not misp.available:
to_return['error'] = 'MISP module not available.' to_return['error'] = 'MISP module not available.'
elif not misp.enable_push: elif not misp.enable_push:
@ -272,10 +274,10 @@ trigger_modules_fields = api.model('TriggerModulesFields', {
@api.route('/json/<string:capture_uuid>/trigger_modules') @api.route('/json/<string:capture_uuid>/trigger_modules')
@api.doc(description='Trigger all the available 3rd party modules on the given capture', @api.doc(description='Trigger all the available 3rd party modules on the given capture',
params={'capture_uuid': 'The UUID of the capture'}) params={'capture_uuid': 'The UUID of the capture'})
class TriggerModules(Resource): class TriggerModules(Resource): # type: ignore[misc]
@api.doc(body=trigger_modules_fields) @api.doc(body=trigger_modules_fields) # type: ignore[misc]
def post(self, capture_uuid: str): def post(self, capture_uuid: str) -> dict[str, Any]:
parameters: Dict = request.get_json(force=True) parameters: dict[str, Any] = request.get_json(force=True)
force = True if parameters.get('force') else False force = True if parameters.get('force') else False
return lookyloo.trigger_modules(capture_uuid, force=force) return lookyloo.trigger_modules(capture_uuid, force=force)
@ -283,12 +285,12 @@ class TriggerModules(Resource):
@api.route('/json/hash_info/<h>') @api.route('/json/hash_info/<h>')
@api.doc(description='Search for a ressource with a specific hash (sha512)', @api.doc(description='Search for a ressource with a specific hash (sha512)',
params={'h': 'The hash (sha512)'}) params={'h': 'The hash (sha512)'})
class HashInfo(Resource): class HashInfo(Resource): # type: ignore[misc]
def get(self, h: str): def get(self, h: str) -> dict[str, Any] | tuple[dict[str, Any], int]:
details, body = lookyloo.get_body_hash_full(h) details, body = lookyloo.get_body_hash_full(h)
if not details: if not details:
return {'error': 'Unknown Hash.'}, 400 return {'error': 'Unknown Hash.'}, 400
to_return: Dict[str, Any] = {'response': {'hash': h, 'details': details, to_return: dict[str, Any] = {'response': {'hash': h, 'details': details,
'body': base64.b64encode(body.getvalue()).decode()}} 'body': base64.b64encode(body.getvalue()).decode()}}
return to_return return to_return
@ -302,11 +304,11 @@ url_info_fields = api.model('URLInfoFields', {
@api.route('/json/url_info') @api.route('/json/url_info')
@api.doc(description='Search for a URL') @api.doc(description='Search for a URL')
class URLInfo(Resource): class URLInfo(Resource): # type: ignore[misc]
@api.doc(body=url_info_fields) @api.doc(body=url_info_fields) # type: ignore[misc]
def post(self): def post(self) -> list[dict[str, Any]]:
to_query: Dict = request.get_json(force=True) to_query: dict[str, Any] = request.get_json(force=True)
occurrences = lookyloo.get_url_occurrences(to_query.pop('url'), **to_query) occurrences = lookyloo.get_url_occurrences(to_query.pop('url'), **to_query)
return occurrences return occurrences
@ -320,51 +322,50 @@ hostname_info_fields = api.model('HostnameInfoFields', {
@api.route('/json/hostname_info') @api.route('/json/hostname_info')
@api.doc(description='Search for a hostname') @api.doc(description='Search for a hostname')
class HostnameInfo(Resource): class HostnameInfo(Resource): # type: ignore[misc]
@api.doc(body=hostname_info_fields) @api.doc(body=hostname_info_fields) # type: ignore[misc]
def post(self): def post(self) -> list[dict[str, Any]]:
to_query: Dict = request.get_json(force=True) to_query: dict[str, Any] = request.get_json(force=True)
occurrences = lookyloo.get_hostname_occurrences(to_query.pop('hostname'), **to_query) return lookyloo.get_hostname_occurrences(to_query.pop('hostname'), **to_query)
return occurrences
@api.route('/json/stats') @api.route('/json/stats')
@api.doc(description='Get the statistics of the lookyloo instance.') @api.doc(description='Get the statistics of the lookyloo instance.')
class InstanceStats(Resource): class InstanceStats(Resource): # type: ignore[misc]
def get(self): def get(self) -> dict[str, Any]:
return lookyloo.get_stats() return lookyloo.get_stats()
@api.route('/json/devices') @api.route('/json/devices')
@api.doc(description='Get the list of devices pre-configured on the platform') @api.doc(description='Get the list of devices pre-configured on the platform')
class Devices(Resource): class Devices(Resource): # type: ignore[misc]
def get(self): def get(self) -> dict[str, Any]:
return lookyloo.get_playwright_devices() return lookyloo.get_playwright_devices()
@api.route('/json/<string:capture_uuid>/stats') @api.route('/json/<string:capture_uuid>/stats')
@api.doc(description='Get the statistics of the capture.', @api.doc(description='Get the statistics of the capture.',
params={'capture_uuid': 'The UUID of the capture'}) params={'capture_uuid': 'The UUID of the capture'})
class CaptureStats(Resource): class CaptureStats(Resource): # type: ignore[misc]
def get(self, capture_uuid: str): def get(self, capture_uuid: str) -> dict[str, Any]:
return lookyloo.get_statistics(capture_uuid) return lookyloo.get_statistics(capture_uuid)
@api.route('/json/<string:capture_uuid>/info') @api.route('/json/<string:capture_uuid>/info')
@api.doc(description='Get basic information about the capture.', @api.doc(description='Get basic information about the capture.',
params={'capture_uuid': 'The UUID of the capture'}) params={'capture_uuid': 'The UUID of the capture'})
class CaptureInfo(Resource): class CaptureInfo(Resource): # type: ignore[misc]
def get(self, capture_uuid: str): def get(self, capture_uuid: str) -> dict[str, Any]:
return lookyloo.get_info(capture_uuid) return lookyloo.get_info(capture_uuid)
@api.route('/json/<string:capture_uuid>/cookies') @api.route('/json/<string:capture_uuid>/cookies')
@api.doc(description='Get the complete cookie jar created during the capture.', @api.doc(description='Get the complete cookie jar created during the capture.',
params={'capture_uuid': 'The UUID of the capture'}) params={'capture_uuid': 'The UUID of the capture'})
class CaptureCookies(Resource): class CaptureCookies(Resource): # type: ignore[misc]
def get(self, capture_uuid: str): def get(self, capture_uuid: str) -> dict[str, Any]:
return json.loads(lookyloo.get_cookies(capture_uuid).read()) return json.loads(lookyloo.get_cookies(capture_uuid).read())
@ -392,17 +393,17 @@ submit_fields_post = api.model('SubmitFieldsPost', {
@api.route('/submit') @api.route('/submit')
class SubmitCapture(Resource): class SubmitCapture(Resource): # type: ignore[misc]
@api.param('url', 'The URL to capture', required=True) @api.param('url', 'The URL to capture', required=True) # type: ignore[misc]
@api.param('listing', 'Display the capture on the index', default=1) @api.param('listing', 'Display the capture on the index', default=1) # type: ignore[misc]
@api.param('user_agent', 'User agent to use for the capture') @api.param('user_agent', 'User agent to use for the capture') # type: ignore[misc]
@api.param('browser_name', 'Use this browser. Must be chromium, firefox or webkit.') @api.param('browser_name', 'Use this browser. Must be chromium, firefox or webkit.') # type: ignore[misc]
@api.param('device_name', 'Use the pre-configured settings for this device') @api.param('device_name', 'Use the pre-configured settings for this device') # type: ignore[misc]
@api.param('referer', 'Referer to pass to the capture') @api.param('referer', 'Referer to pass to the capture') # type: ignore[misc]
@api.param('proxy', 'Proxy to use for the the capture') @api.param('proxy', 'Proxy to use for the the capture') # type: ignore[misc]
@api.produces(['text/text']) @api.produces(['text/text']) # type: ignore[misc]
def get(self): def get(self) -> str | tuple[str, int]:
if flask_login.current_user.is_authenticated: if flask_login.current_user.is_authenticated:
user = flask_login.current_user.get_id() user = flask_login.current_user.get_id()
else: else:
@ -430,9 +431,9 @@ class SubmitCapture(Resource):
perma_uuid = lookyloo.enqueue_capture(to_query, source='api', user=user, authenticated=flask_login.current_user.is_authenticated) perma_uuid = lookyloo.enqueue_capture(to_query, source='api', user=user, authenticated=flask_login.current_user.is_authenticated)
return perma_uuid return perma_uuid
@api.doc(body=submit_fields_post) @api.doc(body=submit_fields_post) # type: ignore[misc]
@api.produces(['text/text']) @api.produces(['text/text']) # type: ignore[misc]
def post(self): def post(self) -> str:
if flask_login.current_user.is_authenticated: if flask_login.current_user.is_authenticated:
user = flask_login.current_user.get_id() user = flask_login.current_user.get_id()
else: else:
@ -447,30 +448,30 @@ class SubmitCapture(Resource):
@api.route('/bin/<string:capture_uuid>/screenshot') @api.route('/bin/<string:capture_uuid>/screenshot')
@api.doc(description='Get the screenshot associated to the capture.', @api.doc(description='Get the screenshot associated to the capture.',
params={'capture_uuid': 'The UUID of the capture'}) params={'capture_uuid': 'The UUID of the capture'})
class CaptureScreenshot(Resource): class CaptureScreenshot(Resource): # type: ignore[misc]
@api.produces(['image/png']) @api.produces(['image/png']) # type: ignore[misc]
def get(self, capture_uuid: str): def get(self, capture_uuid: str) -> Response:
return send_file(lookyloo.get_screenshot(capture_uuid), mimetype='image/png') return send_file(lookyloo.get_screenshot(capture_uuid), mimetype='image/png')
@api.route('/bin/<string:capture_uuid>/export') @api.route('/bin/<string:capture_uuid>/export')
@api.doc(description='Get all the files generated by the capture, except the pickle.', @api.doc(description='Get all the files generated by the capture, except the pickle.',
params={'capture_uuid': 'The UUID of the capture'}) params={'capture_uuid': 'The UUID of the capture'})
class CaptureExport(Resource): class CaptureExport(Resource): # type: ignore[misc]
@api.produces(['application/zip']) @api.produces(['application/zip']) # type: ignore[misc]
def get(self, capture_uuid: str): def get(self, capture_uuid: str) -> Response:
return send_file(lookyloo.get_capture(capture_uuid), mimetype='application/zip') return send_file(lookyloo.get_capture(capture_uuid), mimetype='application/zip')
@api.route('/bin/<string:capture_uuid>/data') @api.route('/bin/<string:capture_uuid>/data')
@api.doc(description='Get the file downloaded by the capture.', @api.doc(description='Get the file downloaded by the capture.',
params={'capture_uuid': 'The UUID of the capture'}) params={'capture_uuid': 'The UUID of the capture'})
class CaptureData(Resource): class CaptureData(Resource): # type: ignore[misc]
@api.produces(['application/zip']) @api.produces(['application/zip']) # type: ignore[misc]
def get(self, capture_uuid: str): def get(self, capture_uuid: str) -> Response:
filename, data = lookyloo.get_data(capture_uuid) filename, data = lookyloo.get_data(capture_uuid)
if not filename: if not filename:
# This capture didn't trigger a download. # This capture didn't trigger a download.
@ -499,10 +500,10 @@ compare_captures_fields = api.model('CompareCapturesFields', {
@api.route('/json/compare_captures') @api.route('/json/compare_captures')
@api.doc(description='Compare two captures') @api.doc(description='Compare two captures')
class CompareCaptures(Resource): class CompareCaptures(Resource): # type: ignore[misc]
@api.doc(body=compare_captures_fields) @api.doc(body=compare_captures_fields) # type: ignore[misc]
def post(self): def post(self) -> dict[str, Any]:
parameters: Dict = request.get_json(force=True) parameters: dict[str, Any] = request.get_json(force=True)
left_uuid = parameters.get('capture_left') left_uuid = parameters.get('capture_left')
right_uuid = parameters.get('capture_right') right_uuid = parameters.get('capture_right')
if not left_uuid or not right_uuid: if not left_uuid or not right_uuid:
@ -545,10 +546,10 @@ comparables_model = api.model('ComparablesModel', {
@api.route('/json/<string:capture_uuid>/comparables') @api.route('/json/<string:capture_uuid>/comparables')
@api.doc(description='Get the data we can compare across captures') @api.doc(description='Get the data we can compare across captures')
class Comparables(Resource): class Comparables(Resource): # type: ignore[misc]
@api.marshal_with(comparables_model) @api.marshal_with(comparables_model) # type: ignore[misc]
def get(self, capture_uuid: str): def get(self, capture_uuid: str) -> dict[str, Any]:
return comparator.get_comparables_capture(capture_uuid) return comparator.get_comparables_capture(capture_uuid)
@ -561,10 +562,10 @@ takedown_fields = api.model('TakedownFields', {
@api.route('/json/takedown') @api.route('/json/takedown')
@api.doc(description='Get information for triggering a takedown request') @api.doc(description='Get information for triggering a takedown request')
class Takedown(Resource): class Takedown(Resource): # type: ignore[misc]
@api.doc(body=takedown_fields) @api.doc(body=takedown_fields) # type: ignore[misc]
def post(self): def post(self) -> list[dict[str, Any]] | dict[str, str]:
parameters: Dict = request.get_json(force=True) parameters: dict[str, Any] = request.get_json(force=True)
capture_uuid = parameters.get('capture_uuid') capture_uuid = parameters.get('capture_uuid')
if not capture_uuid: if not capture_uuid:
return {'error': f'Invalid request: {parameters}'} return {'error': f'Invalid request: {parameters}'}
@ -576,10 +577,10 @@ class Takedown(Resource):
@api.route('/admin/rebuild_all') @api.route('/admin/rebuild_all')
@api.doc(description='Rebuild all the trees. WARNING: IT IS GOING TO TAKE A VERY LONG TIME.', @api.doc(description='Rebuild all the trees. WARNING: IT IS GOING TO TAKE A VERY LONG TIME.',
security='apikey') security='apikey')
class RebuildAll(Resource): class RebuildAll(Resource): # type: ignore[misc]
method_decorators = [api_auth_check] method_decorators = [api_auth_check]
def post(self): def post(self) -> dict[str, str] | tuple[dict[str, str], int]:
try: try:
lookyloo.rebuild_all() lookyloo.rebuild_all()
except Exception as e: except Exception as e:
@ -591,10 +592,10 @@ class RebuildAll(Resource):
@api.route('/admin/rebuild_all_cache') @api.route('/admin/rebuild_all_cache')
@api.doc(description='Rebuild all the caches. It will take a while, but less that rebuild all.', @api.doc(description='Rebuild all the caches. It will take a while, but less that rebuild all.',
security='apikey') security='apikey')
class RebuildAllCache(Resource): class RebuildAllCache(Resource): # type: ignore[misc]
method_decorators = [api_auth_check] method_decorators = [api_auth_check]
def post(self): def post(self) -> dict[str, str] | tuple[dict[str, str], int]:
try: try:
lookyloo.rebuild_cache() lookyloo.rebuild_cache()
except Exception as e: except Exception as e:
@ -607,10 +608,10 @@ class RebuildAllCache(Resource):
@api.doc(description='Rebuild the tree.', @api.doc(description='Rebuild the tree.',
params={'capture_uuid': 'The UUID of the capture'}, params={'capture_uuid': 'The UUID of the capture'},
security='apikey') security='apikey')
class CaptureRebuildTree(Resource): class CaptureRebuildTree(Resource): # type: ignore[misc]
method_decorators = [api_auth_check] method_decorators = [api_auth_check]
def post(self, capture_uuid): def post(self, capture_uuid: str) -> dict[str, str] | tuple[dict[str, str], int]:
try: try:
lookyloo.remove_pickle(capture_uuid) lookyloo.remove_pickle(capture_uuid)
lookyloo.get_crawled_tree(capture_uuid) lookyloo.get_crawled_tree(capture_uuid)
@ -624,10 +625,10 @@ class CaptureRebuildTree(Resource):
@api.doc(description='Hide the capture from the index.', @api.doc(description='Hide the capture from the index.',
params={'capture_uuid': 'The UUID of the capture'}, params={'capture_uuid': 'The UUID of the capture'},
security='apikey') security='apikey')
class CaptureHide(Resource): class CaptureHide(Resource): # type: ignore[misc]
method_decorators = [api_auth_check] method_decorators = [api_auth_check]
def post(self, capture_uuid): def post(self, capture_uuid: str) -> dict[str, str] | tuple[dict[str, str], int]:
try: try:
lookyloo.hide_capture(capture_uuid) lookyloo.hide_capture(capture_uuid)
except Exception as e: except Exception as e:

View File

@ -8,6 +8,7 @@ from pathlib import Path
from typing import Dict, List, Union from typing import Dict, List, Union
import flask_login # type: ignore import flask_login # type: ignore
from flask import Request
from werkzeug.security import generate_password_hash from werkzeug.security import generate_password_hash
from lookyloo.default import get_config, get_homedir from lookyloo.default import get_config, get_homedir
@ -23,7 +24,7 @@ def get_lookyloo_instance() -> Lookyloo:
return __global_lookyloo_instance return __global_lookyloo_instance
def src_request_ip(request) -> str: def src_request_ip(request: Request) -> str | None:
# NOTE: X-Real-IP is the IP passed by the reverse proxy in the headers. # NOTE: X-Real-IP is the IP passed by the reverse proxy in the headers.
real_ip = request.headers.get('X-Real-IP') real_ip = request.headers.get('X-Real-IP')
if not real_ip: if not real_ip:
@ -31,11 +32,11 @@ def src_request_ip(request) -> str:
return real_ip return real_ip
class User(flask_login.UserMixin): class User(flask_login.UserMixin): # type: ignore[misc]
pass pass
def load_user_from_request(request): def load_user_from_request(request: Request) -> User | None:
api_key = request.headers.get('Authorization') api_key = request.headers.get('Authorization')
if not api_key: if not api_key:
return None return None