mirror of https://github.com/CIRCL/lookyloo
chg: Use new annotations
parent
0b5128e5b4
commit
ee1ad48b25
|
@ -3,14 +3,14 @@
|
|||
exclude: "user_agents|website/web/sri.txt"
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.1.0
|
||||
rev: v4.5.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
- id: check-yaml
|
||||
- id: check-added-large-files
|
||||
- repo: https://github.com/asottile/pyupgrade
|
||||
rev: v2.31.1
|
||||
rev: v3.15.0
|
||||
hooks:
|
||||
- id: pyupgrade
|
||||
args: [--py38-plus]
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import gzip
|
||||
import logging
|
||||
|
@ -23,7 +25,7 @@ logging.config.dictConfig(get_config('logging'))
|
|||
|
||||
class Archiver(AbstractManager):
|
||||
|
||||
def __init__(self, loglevel: Optional[int]=None):
|
||||
def __init__(self, loglevel: int | None=None) -> None:
|
||||
super().__init__(loglevel)
|
||||
self.script_name = 'archiver'
|
||||
self.redis = Redis(unix_socket_path=get_socket_path('cache'))
|
||||
|
@ -54,7 +56,7 @@ class Archiver(AbstractManager):
|
|||
self.s3fs_bucket = s3fs_config['config']['bucket_name']
|
||||
self.s3fs_client.clear_multipart_uploads(self.s3fs_bucket)
|
||||
|
||||
def _to_run_forever(self):
|
||||
def _to_run_forever(self) -> None:
|
||||
archiving_done = False
|
||||
# NOTE: When we archive a big directory, moving *a lot* of files, expecially to MinIO
|
||||
# can take a very long time. In order to avoid being stuck on the archiving, we break that in chunks
|
||||
|
@ -71,14 +73,14 @@ class Archiver(AbstractManager):
|
|||
# This call takes a very long time on MinIO
|
||||
self._update_all_capture_indexes()
|
||||
|
||||
def _update_index(self, root_dir: Path, *, s3fs_parent_dir: Optional[str]=None) -> Optional[Path]:
|
||||
def _update_index(self, root_dir: Path, *, s3fs_parent_dir: str | None=None) -> Path | None:
|
||||
# returns a path to the index for the given directory
|
||||
logmsg = f'Updating index for {root_dir}'
|
||||
if s3fs_parent_dir:
|
||||
logmsg = f'{logmsg} (s3fs)'
|
||||
self.logger.info(logmsg)
|
||||
|
||||
current_index: Dict[str, str] = {}
|
||||
current_index: dict[str, str] = {}
|
||||
index_file = root_dir / 'index'
|
||||
if index_file.exists():
|
||||
try:
|
||||
|
@ -91,11 +93,11 @@ class Archiver(AbstractManager):
|
|||
# NOTE: should we remove if it has subs?
|
||||
index_file.unlink()
|
||||
|
||||
sub_indexes: List[Path] = []
|
||||
current_index_dirs: Set[str] = set(current_index.values())
|
||||
new_captures: Set[Path] = set()
|
||||
sub_indexes: list[Path] = []
|
||||
current_index_dirs: set[str] = set(current_index.values())
|
||||
new_captures: set[Path] = set()
|
||||
# Directories that are actually in the listing.
|
||||
current_dirs: Set[str] = set()
|
||||
current_dirs: set[str] = set()
|
||||
|
||||
if s3fs_parent_dir:
|
||||
s3fs_dir = '/'.join([s3fs_parent_dir, root_dir.name])
|
||||
|
@ -212,7 +214,7 @@ class Archiver(AbstractManager):
|
|||
|
||||
return index_file
|
||||
|
||||
def _update_all_capture_indexes(self, *, recent_only: bool=False):
|
||||
def _update_all_capture_indexes(self, *, recent_only: bool=False) -> None:
|
||||
'''Run that after the captures are in the proper directories'''
|
||||
# Recent captures
|
||||
self.logger.info('Update recent indexes')
|
||||
|
@ -278,7 +280,7 @@ class Archiver(AbstractManager):
|
|||
|
||||
return dest_dir / capture_path.name
|
||||
|
||||
def _archive(self):
|
||||
def _archive(self) -> bool:
|
||||
archive_interval = timedelta(days=get_config('generic', 'archive'))
|
||||
cut_time = (datetime.now() - archive_interval)
|
||||
self.logger.info(f'Archiving all captures older than {cut_time.isoformat()}.')
|
||||
|
@ -340,7 +342,7 @@ class Archiver(AbstractManager):
|
|||
self.logger.info('Archiving done.')
|
||||
return archiving_done
|
||||
|
||||
def __load_index(self, index_path: Path, ignore_sub: bool=False) -> Dict[str, str]:
|
||||
def __load_index(self, index_path: Path, ignore_sub: bool=False) -> dict[str, str]:
|
||||
'''Loads the given index file and all the subsequent ones if they exist'''
|
||||
# NOTE: this method is used on recent and archived captures, it must never trigger a dir listing
|
||||
indexed_captures = {}
|
||||
|
@ -359,7 +361,7 @@ class Archiver(AbstractManager):
|
|||
indexed_captures[key] = str(index_path.parent / path_name)
|
||||
return indexed_captures
|
||||
|
||||
def _load_indexes(self):
|
||||
def _load_indexes(self) -> None:
|
||||
# capture_dir / Year / Month / index <- should always exists. If not, created by _update_index
|
||||
# Initialize recent index
|
||||
for index in sorted(get_captures_dir().glob('*/*/index'), reverse=True):
|
||||
|
@ -391,7 +393,7 @@ class Archiver(AbstractManager):
|
|||
self.logger.info(f'Archived indexes loaded: {total_archived_captures} entries.')
|
||||
|
||||
|
||||
def main():
|
||||
def main() -> None:
|
||||
a = Archiver()
|
||||
a.run(sleep_in_sec=3600)
|
||||
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
|
@ -10,7 +12,7 @@ from pathlib import Path
|
|||
from typing import Optional, Set, Union
|
||||
|
||||
from lacuscore import LacusCore, CaptureStatus as CaptureStatusCore, CaptureResponse as CaptureResponseCore
|
||||
from pylacus import PyLacus, CaptureStatus as CaptureStatusPy, CaptureResponse as CaptureResponsePy
|
||||
from pylacus import PyLacus, CaptureStatus as CaptureStatusPy, CaptureResponse as CaptureResponsePy # type: ignore[attr-defined]
|
||||
|
||||
from lookyloo.lookyloo import Lookyloo, CaptureSettings
|
||||
from lookyloo.default import AbstractManager, get_config
|
||||
|
@ -23,7 +25,7 @@ logging.config.dictConfig(get_config('logging'))
|
|||
|
||||
class AsyncCapture(AbstractManager):
|
||||
|
||||
def __init__(self, loglevel: Optional[int]=None):
|
||||
def __init__(self, loglevel: int | None=None) -> None:
|
||||
super().__init__(loglevel)
|
||||
self.script_name = 'async_capture'
|
||||
self.only_global_lookups: bool = get_config('generic', 'only_global_lookups')
|
||||
|
@ -31,7 +33,7 @@ class AsyncCapture(AbstractManager):
|
|||
self.lookyloo = Lookyloo()
|
||||
|
||||
if isinstance(self.lookyloo.lacus, LacusCore):
|
||||
self.captures: Set[asyncio.Task] = set()
|
||||
self.captures: set[asyncio.Task] = set() # type: ignore[type-arg]
|
||||
|
||||
self.fox = FOX(config_name='FOX')
|
||||
if not self.fox.available:
|
||||
|
@ -41,23 +43,24 @@ class AsyncCapture(AbstractManager):
|
|||
if self.fox.available:
|
||||
self.fox.capture_default_trigger(url, auto_trigger=True)
|
||||
|
||||
async def _trigger_captures(self):
|
||||
async def _trigger_captures(self) -> None:
|
||||
# Only called if LacusCore is used
|
||||
max_new_captures = get_config('generic', 'async_capture_processes') - len(self.captures)
|
||||
self.logger.debug(f'{len(self.captures)} ongoing captures.')
|
||||
if max_new_captures <= 0:
|
||||
self.logger.info(f'Max amount of captures in parallel reached ({len(self.captures)})')
|
||||
return
|
||||
for capture_task in self.lookyloo.lacus.consume_queue(max_new_captures):
|
||||
return None
|
||||
for capture_task in self.lookyloo.lacus.consume_queue(max_new_captures): # type: ignore[union-attr]
|
||||
self.captures.add(capture_task)
|
||||
capture_task.add_done_callback(self.captures.discard)
|
||||
|
||||
def uuids_ready(self):
|
||||
def uuids_ready(self) -> list[str]:
|
||||
return [uuid for uuid in self.lookyloo.redis.zrevrangebyscore('to_capture', 'Inf', '-Inf')
|
||||
if uuid and self.lookyloo.lacus.get_capture_status(uuid) in [CaptureStatusPy.DONE, CaptureStatusCore]]
|
||||
|
||||
def process_capture_queue(self) -> None:
|
||||
'''Process a query from the capture queue'''
|
||||
entries: Union[CaptureResponseCore, CaptureResponsePy]
|
||||
entries: CaptureResponseCore | CaptureResponsePy
|
||||
for uuid in self.uuids_ready():
|
||||
if isinstance(self.lookyloo.lacus, LacusCore):
|
||||
entries = self.lookyloo.lacus.get_capture(uuid, decode=True)
|
||||
|
@ -71,9 +74,9 @@ class AsyncCapture(AbstractManager):
|
|||
self.logger.info(log)
|
||||
|
||||
self.lookyloo.redis.sadd('ongoing', uuid)
|
||||
queue: Optional[str] = self.lookyloo.redis.getdel(f'{uuid}_mgmt')
|
||||
queue: str | None = self.lookyloo.redis.getdel(f'{uuid}_mgmt')
|
||||
|
||||
to_capture: CaptureSettings = self.lookyloo.redis.hgetall(uuid)
|
||||
to_capture: CaptureSettings = self.lookyloo.redis.hgetall(uuid) # type: ignore[assignment]
|
||||
|
||||
if get_config('generic', 'default_public'):
|
||||
# By default, the captures are on the index, unless the user mark them as un-listed
|
||||
|
@ -123,9 +126,9 @@ class AsyncCapture(AbstractManager):
|
|||
self.unset_running()
|
||||
self.logger.info(f'Done with {uuid}')
|
||||
|
||||
async def _to_run_forever_async(self):
|
||||
async def _to_run_forever_async(self) -> None:
|
||||
if self.force_stop:
|
||||
return
|
||||
return None
|
||||
|
||||
if isinstance(self.lookyloo.lacus, LacusCore):
|
||||
await self._trigger_captures()
|
||||
|
@ -135,7 +138,7 @@ class AsyncCapture(AbstractManager):
|
|||
|
||||
self.process_capture_queue()
|
||||
|
||||
async def _wait_to_finish_async(self):
|
||||
async def _wait_to_finish_async(self) -> None:
|
||||
if isinstance(self.lookyloo.lacus, LacusCore):
|
||||
while self.captures:
|
||||
self.logger.info(f'Waiting for {len(self.captures)} capture(s) to finish...')
|
||||
|
@ -147,7 +150,7 @@ class AsyncCapture(AbstractManager):
|
|||
self.logger.info('No more captures')
|
||||
|
||||
|
||||
def main():
|
||||
def main() -> None:
|
||||
m = AsyncCapture()
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import logging.config
|
||||
import os
|
||||
|
@ -20,7 +22,7 @@ logging.config.dictConfig(get_config('logging'))
|
|||
|
||||
class BackgroundIndexer(AbstractManager):
|
||||
|
||||
def __init__(self, loglevel: Optional[int]=None):
|
||||
def __init__(self, loglevel: int | None=None):
|
||||
super().__init__(loglevel)
|
||||
self.lookyloo = Lookyloo()
|
||||
self.script_name = 'background_indexer'
|
||||
|
@ -28,7 +30,7 @@ class BackgroundIndexer(AbstractManager):
|
|||
self.discarded_captures_dir = self.lookyloo.capture_dir.parent / 'discarded_captures'
|
||||
self.discarded_captures_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def _to_run_forever(self):
|
||||
def _to_run_forever(self) -> None:
|
||||
all_done = self._build_missing_pickles()
|
||||
if all_done:
|
||||
self._check_indexes()
|
||||
|
@ -72,7 +74,7 @@ class BackgroundIndexer(AbstractManager):
|
|||
# The capture with this UUID exists, but it is for some reason missing in lookup_dirs
|
||||
self.lookyloo.redis.hset('lookup_dirs', uuid, str(path))
|
||||
else:
|
||||
cached_path = Path(self.lookyloo.redis.hget('lookup_dirs', uuid))
|
||||
cached_path = Path(self.lookyloo.redis.hget('lookup_dirs', uuid)) # type: ignore[arg-type]
|
||||
if cached_path != path:
|
||||
# we have a duplicate UUID, it is proably related to some bad copy/paste
|
||||
if cached_path.exists():
|
||||
|
@ -118,13 +120,13 @@ class BackgroundIndexer(AbstractManager):
|
|||
return True
|
||||
return False
|
||||
|
||||
def _check_indexes(self):
|
||||
def _check_indexes(self) -> None:
|
||||
index_redis = self.lookyloo.indexing.redis
|
||||
can_index = index_redis.set('ongoing_indexing', 1, ex=3600, nx=True)
|
||||
if not can_index:
|
||||
# There is no reason to run this method in multiple scripts.
|
||||
self.logger.info('Indexing already ongoing in another process.')
|
||||
return
|
||||
return None
|
||||
self.logger.info('Check indexes...')
|
||||
for cache in self.lookyloo.sorted_capture_cache(cached_captures_only=False):
|
||||
if self.lookyloo.is_public_instance and cache.no_index:
|
||||
|
@ -163,7 +165,7 @@ class BackgroundIndexer(AbstractManager):
|
|||
self.logger.info('... done.')
|
||||
|
||||
|
||||
def main():
|
||||
def main() -> None:
|
||||
i = BackgroundIndexer()
|
||||
i.run(sleep_in_sec=60)
|
||||
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import time
|
||||
import logging
|
||||
|
@ -8,7 +10,7 @@ from collections import Counter
|
|||
from datetime import date, timedelta
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from lookyloo.lookyloo import Lookyloo, CaptureStatusCore, CaptureStatusPy
|
||||
from lookyloo.lookyloo import Lookyloo, CaptureStatusCore, CaptureStatusPy # type: ignore[attr-defined]
|
||||
from lookyloo.default import AbstractManager, get_config, get_homedir, safe_create_dir
|
||||
from lookyloo.helpers import ParsedUserAgent, serialize_to_json
|
||||
|
||||
|
@ -17,19 +19,19 @@ logging.config.dictConfig(get_config('logging'))
|
|||
|
||||
class Processing(AbstractManager):
|
||||
|
||||
def __init__(self, loglevel: Optional[int]=None):
|
||||
def __init__(self, loglevel: int | None=None):
|
||||
super().__init__(loglevel)
|
||||
self.script_name = 'processing'
|
||||
self.lookyloo = Lookyloo()
|
||||
|
||||
self.use_own_ua = get_config('generic', 'use_user_agents_users')
|
||||
|
||||
def _to_run_forever(self):
|
||||
def _to_run_forever(self) -> None:
|
||||
if self.use_own_ua:
|
||||
self._build_ua_file()
|
||||
self._retry_failed_enqueue()
|
||||
|
||||
def _build_ua_file(self):
|
||||
def _build_ua_file(self) -> None:
|
||||
'''Build a file in a format compatible with the capture page'''
|
||||
yesterday = (date.today() - timedelta(days=1))
|
||||
self_generated_ua_file_path = get_homedir() / 'own_user_agents' / str(yesterday.year) / f'{yesterday.month:02}'
|
||||
|
@ -44,7 +46,7 @@ class Processing(AbstractManager):
|
|||
self.logger.info(f'No User-agent file for {yesterday} to generate.')
|
||||
return
|
||||
|
||||
to_store: Dict[str, Any] = {'by_frequency': []}
|
||||
to_store: dict[str, Any] = {'by_frequency': []}
|
||||
uas = Counter([entry.split('|', 1)[1] for entry in entries])
|
||||
for ua, _ in uas.most_common():
|
||||
parsed_ua = ParsedUserAgent(ua)
|
||||
|
@ -71,7 +73,7 @@ class Processing(AbstractManager):
|
|||
self.lookyloo.redis.delete(f'user_agents|{yesterday.isoformat()}')
|
||||
self.logger.info(f'User-agent file for {yesterday} generated.')
|
||||
|
||||
def _retry_failed_enqueue(self):
|
||||
def _retry_failed_enqueue(self) -> None:
|
||||
'''If enqueuing failed, the settings are added, with a UUID in the 'to_capture key', and they have a UUID'''
|
||||
for uuid in self.lookyloo.redis.zrevrangebyscore('to_capture', 'Inf', '-Inf'):
|
||||
try_reenqueue = False
|
||||
|
@ -131,7 +133,7 @@ class Processing(AbstractManager):
|
|||
self.logger.info(f'{uuid} enqueued.')
|
||||
|
||||
|
||||
def main():
|
||||
def main() -> None:
|
||||
p = Processing()
|
||||
p.run(sleep_in_sec=30)
|
||||
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
|
@ -24,14 +26,14 @@ def check_running(name: str) -> bool:
|
|||
return False
|
||||
|
||||
|
||||
def launch_cache(storage_directory: Optional[Path]=None):
|
||||
def launch_cache(storage_directory: Path | None=None) -> None:
|
||||
if not storage_directory:
|
||||
storage_directory = get_homedir()
|
||||
if not check_running('cache'):
|
||||
Popen(["./run_redis.sh"], cwd=(storage_directory / 'cache'))
|
||||
|
||||
|
||||
def shutdown_cache(storage_directory: Optional[Path]=None):
|
||||
def shutdown_cache(storage_directory: Path | None=None) -> None:
|
||||
if not storage_directory:
|
||||
storage_directory = get_homedir()
|
||||
r = Redis(unix_socket_path=get_socket_path('cache'))
|
||||
|
@ -39,14 +41,14 @@ def shutdown_cache(storage_directory: Optional[Path]=None):
|
|||
print('Redis cache database shutdown.')
|
||||
|
||||
|
||||
def launch_indexing(storage_directory: Optional[Path]=None):
|
||||
def launch_indexing(storage_directory: Path | None=None) -> None:
|
||||
if not storage_directory:
|
||||
storage_directory = get_homedir()
|
||||
if not check_running('indexing'):
|
||||
Popen(["./run_redis.sh"], cwd=(storage_directory / 'indexing'))
|
||||
|
||||
|
||||
def shutdown_indexing(storage_directory: Optional[Path]=None):
|
||||
def shutdown_indexing(storage_directory: Path | None=None) -> None:
|
||||
if not storage_directory:
|
||||
storage_directory = get_homedir()
|
||||
r = Redis(unix_socket_path=get_socket_path('indexing'))
|
||||
|
@ -54,13 +56,13 @@ def shutdown_indexing(storage_directory: Optional[Path]=None):
|
|||
print('Redis indexing database shutdown.')
|
||||
|
||||
|
||||
def launch_all():
|
||||
def launch_all() -> None:
|
||||
launch_cache()
|
||||
launch_indexing()
|
||||
|
||||
|
||||
def check_all(stop: bool=False):
|
||||
backends: Dict[str, bool] = {'cache': False, 'indexing': False}
|
||||
def check_all(stop: bool=False) -> None:
|
||||
backends: dict[str, bool] = {'cache': False, 'indexing': False}
|
||||
while True:
|
||||
for db_name in backends.keys():
|
||||
try:
|
||||
|
@ -81,12 +83,12 @@ def check_all(stop: bool=False):
|
|||
time.sleep(1)
|
||||
|
||||
|
||||
def stop_all():
|
||||
def stop_all() -> None:
|
||||
shutdown_cache()
|
||||
shutdown_indexing()
|
||||
|
||||
|
||||
def main():
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description='Manage backend DBs.')
|
||||
parser.add_argument("--start", action='store_true', default=False, help="Start all")
|
||||
parser.add_argument("--stop", action='store_true', default=False, help="Stop all")
|
||||
|
|
|
@ -5,7 +5,7 @@ import time
|
|||
from lookyloo.default import AbstractManager
|
||||
|
||||
|
||||
def main():
|
||||
def main() -> None:
|
||||
AbstractManager.force_shutdown()
|
||||
time.sleep(5)
|
||||
while True:
|
||||
|
|
|
@ -5,7 +5,7 @@ from subprocess import Popen, run
|
|||
from lookyloo.default import get_homedir
|
||||
|
||||
|
||||
def main():
|
||||
def main() -> None:
|
||||
# Just fail if the env isn't set.
|
||||
get_homedir()
|
||||
print('Start backend (redis)...')
|
||||
|
|
|
@ -13,13 +13,13 @@ logging.config.dictConfig(get_config('logging'))
|
|||
|
||||
class Website(AbstractManager):
|
||||
|
||||
def __init__(self, loglevel: Optional[int]=None):
|
||||
def __init__(self, loglevel: Optional[int]=None) -> None:
|
||||
super().__init__(loglevel)
|
||||
self.script_name = 'website'
|
||||
self.process = self._launch_website()
|
||||
self.process: Popen = self._launch_website() # type: ignore[type-arg]
|
||||
self.set_running()
|
||||
|
||||
def _launch_website(self):
|
||||
def _launch_website(self) -> Popen: # type: ignore[type-arg]
|
||||
website_dir = get_homedir() / 'website'
|
||||
ip = get_config('generic', 'website_listen_ip')
|
||||
port = get_config('generic', 'website_listen_port')
|
||||
|
@ -32,7 +32,7 @@ class Website(AbstractManager):
|
|||
cwd=website_dir)
|
||||
|
||||
|
||||
def main():
|
||||
def main() -> None:
|
||||
w = Website()
|
||||
w.run(sleep_in_sec=10)
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ from redis.exceptions import ConnectionError
|
|||
from lookyloo.default import get_homedir, get_socket_path
|
||||
|
||||
|
||||
def main():
|
||||
def main() -> None:
|
||||
get_homedir()
|
||||
p = Popen(['shutdown'])
|
||||
p.wait()
|
||||
|
|
|
@ -15,14 +15,14 @@ from lookyloo.default import get_homedir, get_config
|
|||
logging.config.dictConfig(get_config('logging'))
|
||||
|
||||
|
||||
def compute_hash_self():
|
||||
def compute_hash_self() -> bytes:
|
||||
m = hashlib.sha256()
|
||||
with (get_homedir() / 'bin' / 'update.py').open('rb') as f:
|
||||
m.update(f.read())
|
||||
return m.digest()
|
||||
|
||||
|
||||
def keep_going(ignore=False):
|
||||
def keep_going(ignore: bool=False) -> None:
|
||||
if ignore:
|
||||
return
|
||||
keep_going = input('Continue? (y/N) ')
|
||||
|
@ -31,7 +31,7 @@ def keep_going(ignore=False):
|
|||
sys.exit()
|
||||
|
||||
|
||||
def run_command(command, expect_fail: bool=False, capture_output: bool=True):
|
||||
def run_command(command: str, expect_fail: bool=False, capture_output: bool=True) -> None:
|
||||
args = shlex.split(command)
|
||||
homedir = get_homedir()
|
||||
process = subprocess.run(args, cwd=homedir, capture_output=capture_output)
|
||||
|
@ -42,7 +42,7 @@ def run_command(command, expect_fail: bool=False, capture_output: bool=True):
|
|||
sys.exit()
|
||||
|
||||
|
||||
def check_poetry_version():
|
||||
def check_poetry_version() -> None:
|
||||
args = shlex.split("poetry self -V")
|
||||
homedir = get_homedir()
|
||||
process = subprocess.run(args, cwd=homedir, capture_output=True)
|
||||
|
@ -58,7 +58,7 @@ def check_poetry_version():
|
|||
sys.exit()
|
||||
|
||||
|
||||
def main():
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description='Pull latest release, update dependencies, update and validate the config files, update 3rd deps for the website.')
|
||||
parser.add_argument('--yes', default=False, action='store_true', help='Run all commands without asking.')
|
||||
args = parser.parse_args()
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
import logging
|
||||
|
||||
from .lookyloo import Lookyloo # noqa
|
||||
from .indexing import Indexing # noqa
|
||||
|
||||
logging.getLogger(__name__).addHandler(logging.NullHandler())
|
||||
|
||||
__all__ = ['Lookyloo', 'Indexing']
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import gzip
|
||||
import json
|
||||
|
@ -13,15 +15,15 @@ import time
|
|||
|
||||
from collections.abc import Mapping
|
||||
from datetime import datetime
|
||||
from functools import lru_cache
|
||||
from functools import lru_cache, _CacheInfo as CacheInfo
|
||||
from logging import Logger, LoggerAdapter
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union, Set, MutableMapping
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union, Set, MutableMapping, Iterator
|
||||
|
||||
import dns.rdatatype
|
||||
import dns.resolver
|
||||
from har2tree import CrawledTree, Har2TreeError, HarFile
|
||||
from pyipasnhistory import IPASNHistory
|
||||
from har2tree import CrawledTree, Har2TreeError, HarFile # type: ignore[attr-defined]
|
||||
from pyipasnhistory import IPASNHistory # type: ignore[attr-defined]
|
||||
from redis import Redis
|
||||
|
||||
from .context import Context
|
||||
|
@ -32,11 +34,11 @@ from .exceptions import MissingCaptureDirectory, NoValidHarFile, MissingUUID, Tr
|
|||
from .modules import Cloudflare
|
||||
|
||||
|
||||
class LookylooCacheLogAdapter(LoggerAdapter):
|
||||
class LookylooCacheLogAdapter(LoggerAdapter): # type: ignore[type-arg]
|
||||
"""
|
||||
Prepend log entry with the UUID of the capture
|
||||
"""
|
||||
def process(self, msg: str, kwargs: MutableMapping[str, Any]) -> Tuple[str, MutableMapping[str, Any]]:
|
||||
def process(self, msg: str, kwargs: MutableMapping[str, Any]) -> tuple[str, MutableMapping[str, Any]]:
|
||||
if self.extra:
|
||||
return '[{}] {}'.format(self.extra['uuid'], msg), kwargs
|
||||
return msg, kwargs
|
||||
|
@ -47,10 +49,10 @@ class CaptureCache():
|
|||
'error', 'no_index', 'categories', 'parent',
|
||||
'user_agent', 'referer', 'logger')
|
||||
|
||||
def __init__(self, cache_entry: Dict[str, Any]):
|
||||
def __init__(self, cache_entry: dict[str, Any]):
|
||||
logger = logging.getLogger(f'{self.__class__.__name__}')
|
||||
logger.setLevel(get_config('generic', 'loglevel'))
|
||||
__default_cache_keys: Tuple[str, str, str, str, str, str] = ('uuid', 'title', 'timestamp',
|
||||
__default_cache_keys: tuple[str, str, str, str, str, str] = ('uuid', 'title', 'timestamp',
|
||||
'url', 'redirects', 'capture_dir')
|
||||
if 'uuid' not in cache_entry or 'capture_dir' not in cache_entry:
|
||||
raise LookylooException(f'The capture is deeply broken: {cache_entry}')
|
||||
|
@ -80,16 +82,16 @@ class CaptureCache():
|
|||
# If the microsecond is missing (0), it fails
|
||||
self.timestamp = datetime.strptime(cache_entry['timestamp'], '%Y-%m-%dT%H:%M:%S%z')
|
||||
|
||||
self.redirects: List[str] = json.loads(cache_entry['redirects']) if cache_entry.get('redirects') else []
|
||||
self.redirects: list[str] = json.loads(cache_entry['redirects']) if cache_entry.get('redirects') else []
|
||||
|
||||
# Error without all the keys in __default_cache_keys was fatal.
|
||||
# if the keys in __default_cache_keys are present, it was an HTTP error and we still need to pass the error along
|
||||
self.error: Optional[str] = cache_entry.get('error')
|
||||
self.error: str | None = cache_entry.get('error')
|
||||
self.no_index: bool = True if cache_entry.get('no_index') in [1, '1'] else False
|
||||
self.categories: List[str] = json.loads(cache_entry['categories']) if cache_entry.get('categories') else []
|
||||
self.parent: Optional[str] = cache_entry.get('parent')
|
||||
self.user_agent: Optional[str] = cache_entry.get('user_agent')
|
||||
self.referer: Optional[str] = cache_entry.get('referer')
|
||||
self.categories: list[str] = json.loads(cache_entry['categories']) if cache_entry.get('categories') else []
|
||||
self.parent: str | None = cache_entry.get('parent')
|
||||
self.user_agent: str | None = cache_entry.get('user_agent')
|
||||
self.referer: str | None = cache_entry.get('referer')
|
||||
|
||||
@property
|
||||
def tree(self) -> CrawledTree:
|
||||
|
@ -142,26 +144,26 @@ def load_pickle_tree(capture_dir: Path, last_mod_time: int, logger: Logger) -> C
|
|||
raise NoValidHarFile("Couldn't find HAR files")
|
||||
|
||||
|
||||
def serialize_sets(obj):
|
||||
def serialize_sets(obj: Any) -> Any:
|
||||
if isinstance(obj, set):
|
||||
return list(obj)
|
||||
|
||||
return obj
|
||||
|
||||
|
||||
class CapturesIndex(Mapping):
|
||||
class CapturesIndex(Mapping): # type: ignore[type-arg]
|
||||
|
||||
def __init__(self, redis: Redis, contextualizer: Optional[Context]=None):
|
||||
def __init__(self, redis: Redis, contextualizer: Context | None=None) -> None: # type: ignore[type-arg]
|
||||
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
||||
self.logger.setLevel(get_config('generic', 'loglevel'))
|
||||
self.redis = redis
|
||||
self.indexing = Indexing()
|
||||
self.contextualizer = contextualizer
|
||||
self.__cache: Dict[str, CaptureCache] = {}
|
||||
self.__cache: dict[str, CaptureCache] = {}
|
||||
self._quick_init()
|
||||
self.timeout = get_config('generic', 'max_tree_create_time')
|
||||
try:
|
||||
self.ipasnhistory: Optional[IPASNHistory] = IPASNHistory()
|
||||
self.ipasnhistory: IPASNHistory | None = IPASNHistory()
|
||||
if not self.ipasnhistory.is_up:
|
||||
self.ipasnhistory = None
|
||||
except Exception as e:
|
||||
|
@ -169,7 +171,7 @@ class CapturesIndex(Mapping):
|
|||
self.logger.warning(f'Unable to setup IPASN History: {e}')
|
||||
self.ipasnhistory = None
|
||||
try:
|
||||
self.cloudflare: Optional[Cloudflare] = Cloudflare()
|
||||
self.cloudflare: Cloudflare | None = Cloudflare()
|
||||
if not self.cloudflare.available:
|
||||
self.cloudflare = None
|
||||
except Exception as e:
|
||||
|
@ -177,7 +179,7 @@ class CapturesIndex(Mapping):
|
|||
self.cloudflare = None
|
||||
|
||||
@property
|
||||
def cached_captures(self) -> Set[str]:
|
||||
def cached_captures(self) -> set[str]:
|
||||
self._quick_init()
|
||||
return set(self.__cache.keys())
|
||||
|
||||
|
@ -199,10 +201,10 @@ class CapturesIndex(Mapping):
|
|||
self.__cache[uuid] = self._set_capture_cache(capture_dir)
|
||||
return self.__cache[uuid]
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.__cache)
|
||||
def __iter__(self) -> Iterator[dict[str, CaptureCache]]:
|
||||
return iter(self.__cache) # type: ignore[arg-type]
|
||||
|
||||
def __len__(self):
|
||||
def __len__(self) -> int:
|
||||
return len(self.__cache)
|
||||
|
||||
def reload_cache(self, uuid: str) -> None:
|
||||
|
@ -221,7 +223,7 @@ class CapturesIndex(Mapping):
|
|||
self.redis.flushdb()
|
||||
self.__cache = {}
|
||||
|
||||
def lru_cache_status(self):
|
||||
def lru_cache_status(self) -> CacheInfo:
|
||||
return load_pickle_tree.cache_info()
|
||||
|
||||
def _quick_init(self) -> None:
|
||||
|
@ -332,11 +334,11 @@ class CapturesIndex(Mapping):
|
|||
return tree
|
||||
|
||||
@staticmethod
|
||||
def _raise_timeout(_, __):
|
||||
def _raise_timeout(_, __) -> None: # type: ignore[no-untyped-def]
|
||||
raise TimeoutError
|
||||
|
||||
@contextlib.contextmanager
|
||||
def _timeout_context(self):
|
||||
def _timeout_context(self) -> Iterator[None]:
|
||||
if self.timeout != 0:
|
||||
# Register a function to raise a TimeoutError on the signal.
|
||||
signal.signal(signal.SIGALRM, self._raise_timeout)
|
||||
|
@ -378,7 +380,7 @@ class CapturesIndex(Mapping):
|
|||
logger.warning(f'Unable to rebuild the tree for {capture_dir}, the HAR files are broken.')
|
||||
tree = None
|
||||
|
||||
cache: Dict[str, Union[str, int]] = {'uuid': uuid, 'capture_dir': capture_dir_str}
|
||||
cache: dict[str, str | int] = {'uuid': uuid, 'capture_dir': capture_dir_str}
|
||||
if capture_settings.get('url'):
|
||||
cache['url'] = capture_settings['url']
|
||||
|
||||
|
@ -450,18 +452,18 @@ class CapturesIndex(Mapping):
|
|||
p.execute()
|
||||
return CaptureCache(cache)
|
||||
|
||||
def __resolve_dns(self, ct: CrawledTree, logger: LookylooCacheLogAdapter):
|
||||
def __resolve_dns(self, ct: CrawledTree, logger: LookylooCacheLogAdapter) -> CrawledTree:
|
||||
'''Resolves all domains of the tree, keeps A (IPv4), AAAA (IPv6), and CNAME entries
|
||||
and store them in ips.json and cnames.json, in the capture directory.
|
||||
Updates the nodes of the tree accordingly so the information is available.
|
||||
'''
|
||||
|
||||
def _build_cname_chain(known_cnames: Dict[str, str], hostname) -> List[str]:
|
||||
def _build_cname_chain(known_cnames: dict[str, str], hostname: str) -> list[str]:
|
||||
'''Returns a list of CNAMEs starting from one hostname.
|
||||
The CNAMEs resolutions are made in `_resolve_dns`. A hostname can have a CNAME entry
|
||||
and the CNAME entry can have an other CNAME entry, and so on multiple times.
|
||||
This method loops over the hostnames until there are no CNAMES.'''
|
||||
cnames: List[str] = []
|
||||
cnames: list[str] = []
|
||||
to_search = hostname
|
||||
while True:
|
||||
if not known_cnames.get(to_search):
|
||||
|
@ -474,7 +476,7 @@ class CapturesIndex(Mapping):
|
|||
ips_path = ct.root_hartree.har.path.parent / 'ips.json'
|
||||
ipasn_path = ct.root_hartree.har.path.parent / 'ipasn.json'
|
||||
|
||||
host_cnames: Dict[str, str] = {}
|
||||
host_cnames: dict[str, str] = {}
|
||||
if cnames_path.exists():
|
||||
try:
|
||||
with cnames_path.open() as f:
|
||||
|
@ -483,7 +485,7 @@ class CapturesIndex(Mapping):
|
|||
# The json is broken, delete and re-trigger the requests
|
||||
host_cnames = {}
|
||||
|
||||
host_ips: Dict[str, Dict[str, Set[str]]] = {}
|
||||
host_ips: dict[str, dict[str, set[str]]] = {}
|
||||
if ips_path.exists():
|
||||
try:
|
||||
with ips_path.open() as f:
|
||||
|
@ -492,7 +494,7 @@ class CapturesIndex(Mapping):
|
|||
# The json is broken, delete and re-trigger the requests
|
||||
host_ips = {}
|
||||
|
||||
ipasn: Dict[str, Dict[str, str]] = {}
|
||||
ipasn: dict[str, dict[str, str]] = {}
|
||||
if ipasn_path.exists():
|
||||
try:
|
||||
with ipasn_path.open() as f:
|
||||
|
|
|
@ -1,11 +1,13 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import fnmatch
|
||||
import logging
|
||||
|
||||
from typing import Dict, Any, Union, List, Optional, TypedDict, Tuple
|
||||
|
||||
from har2tree import URLNode
|
||||
from har2tree import URLNode # type: ignore[attr-defined]
|
||||
|
||||
from redis import ConnectionPool, Redis
|
||||
from redis.connection import UnixDomainSocketConnection
|
||||
|
@ -19,8 +21,8 @@ from .exceptions import MissingUUID, TreeNeedsRebuild
|
|||
class CompareSettings(TypedDict):
|
||||
'''The settings that can be passed to the compare method to filter out some differences'''
|
||||
|
||||
ressources_ignore_domains: Tuple[str, ...]
|
||||
ressources_ignore_regexes: Tuple[str, ...]
|
||||
ressources_ignore_domains: tuple[str, ...]
|
||||
ressources_ignore_regexes: tuple[str, ...]
|
||||
|
||||
ignore_ips: bool
|
||||
|
||||
|
@ -39,16 +41,16 @@ class Comparator():
|
|||
self.public_domain = get_config('generic', 'public_domain')
|
||||
|
||||
@property
|
||||
def redis(self) -> Redis:
|
||||
def redis(self) -> Redis: # type: ignore[type-arg]
|
||||
return Redis(connection_pool=self.redis_pool)
|
||||
|
||||
def get_comparables_node(self, node: URLNode) -> Dict[str, str]:
|
||||
def get_comparables_node(self, node: URLNode) -> dict[str, str]:
|
||||
to_return = {'url': node.name, 'hostname': node.hostname}
|
||||
if hasattr(node, 'ip_address'):
|
||||
to_return['ip_address'] = str(node.ip_address)
|
||||
return to_return
|
||||
|
||||
def _compare_nodes(self, left: Dict[str, str], right: Dict[str, str], /, different: bool, ignore_ips: bool) -> Tuple[bool, Dict[str, Any]]:
|
||||
def _compare_nodes(self, left: dict[str, str], right: dict[str, str], /, different: bool, ignore_ips: bool) -> tuple[bool, dict[str, Any]]:
|
||||
to_return = {}
|
||||
# URL
|
||||
if left['url'] != right['url']:
|
||||
|
@ -78,12 +80,12 @@ class Comparator():
|
|||
# IPs in hostnode + ASNs
|
||||
return different, to_return
|
||||
|
||||
def get_comparables_capture(self, capture_uuid: str) -> Dict[str, Any]:
|
||||
def get_comparables_capture(self, capture_uuid: str) -> dict[str, Any]:
|
||||
if capture_uuid not in self._captures_index:
|
||||
raise MissingUUID(f'{capture_uuid} does not exists.')
|
||||
|
||||
capture = self._captures_index[capture_uuid]
|
||||
to_return: Dict[str, Any]
|
||||
to_return: dict[str, Any]
|
||||
try:
|
||||
if capture.error:
|
||||
# The error on lookyloo is too verbose and contains the UUID of the capture, skip that.
|
||||
|
@ -108,17 +110,17 @@ class Comparator():
|
|||
to_return = {'error': str(e)}
|
||||
return to_return
|
||||
|
||||
def compare_captures(self, capture_left: str, capture_right: str, /, *, settings: Optional[CompareSettings]=None) -> Tuple[bool, Dict[str, Any]]:
|
||||
def compare_captures(self, capture_left: str, capture_right: str, /, *, settings: CompareSettings | None=None) -> tuple[bool, dict[str, Any]]:
|
||||
if capture_left not in self._captures_index:
|
||||
raise MissingUUID(f'{capture_left} does not exists.')
|
||||
if capture_right not in self._captures_index:
|
||||
raise MissingUUID(f'{capture_right} does not exists.')
|
||||
|
||||
different: bool = False
|
||||
to_return: Dict[str, Dict[str, Union[str,
|
||||
List[Union[str, Dict[str, Any]]],
|
||||
Dict[str, Union[int, str,
|
||||
List[Union[int, str, Dict[str, Any]]]]]]]] = {}
|
||||
to_return: dict[str, dict[str, (str |
|
||||
list[str | dict[str, Any]] |
|
||||
dict[str, (int | str |
|
||||
list[int | str | dict[str, Any]])])]] = {}
|
||||
to_return['lookyloo_urls'] = {'left': f'https://{self.public_domain}/tree/{capture_left}',
|
||||
'right': f'https://{self.public_domain}/tree/{capture_right}'}
|
||||
left = self.get_comparables_capture(capture_left)
|
||||
|
@ -192,7 +194,7 @@ class Comparator():
|
|||
'details': left['redirects']['length']}
|
||||
|
||||
# Prepare settings
|
||||
_settings: Optional[CompareSettings]
|
||||
_settings: CompareSettings | None
|
||||
if settings:
|
||||
# cleanup the settings
|
||||
_ignore_domains = set(settings['ressources_ignore_domains'] if settings.get('ressources_ignore_domains') else [])
|
||||
|
|
|
@ -1,12 +1,14 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Set, Union
|
||||
from urllib.parse import urlsplit
|
||||
|
||||
from har2tree import CrawledTree, HostNode, URLNode
|
||||
from har2tree import CrawledTree, HostNode, URLNode # type: ignore[attr-defined]
|
||||
from redis import Redis
|
||||
|
||||
from .default import get_config, get_homedir, get_socket_path
|
||||
|
@ -16,14 +18,14 @@ from .modules import SaneJavaScript
|
|||
|
||||
class Context():
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
||||
self.logger.setLevel(get_config('generic', 'loglevel'))
|
||||
self.redis: Redis = Redis(unix_socket_path=get_socket_path('indexing'), db=1, decode_responses=True)
|
||||
self.redis: Redis = Redis(unix_socket_path=get_socket_path('indexing'), db=1, decode_responses=True) # type: ignore[type-arg]
|
||||
self._cache_known_content()
|
||||
self.sanejs = SaneJavaScript(config_name='SaneJS')
|
||||
|
||||
def clear_context(self):
|
||||
def clear_context(self) -> None:
|
||||
self.redis.flushdb()
|
||||
|
||||
def _cache_known_content(self) -> None:
|
||||
|
@ -55,13 +57,13 @@ class Context():
|
|||
p.sadd(f'bh|{h}|legitimate', *details['hostnames'])
|
||||
p.execute()
|
||||
|
||||
def find_known_content(self, har2tree_container: Union[CrawledTree, HostNode, URLNode, str]) -> Dict[str, Any]:
|
||||
def find_known_content(self, har2tree_container: CrawledTree | HostNode | URLNode | str) -> dict[str, Any]:
|
||||
"""Return a dictionary of content resources found in the local known_content database, or in SaneJS (if enabled)"""
|
||||
if isinstance(har2tree_container, str):
|
||||
to_lookup: Set[str] = {har2tree_container, }
|
||||
to_lookup: set[str] = {har2tree_container, }
|
||||
else:
|
||||
to_lookup = get_resources_hashes(har2tree_container)
|
||||
known_content_table: Dict[str, Any] = {}
|
||||
known_content_table: dict[str, Any] = {}
|
||||
if not to_lookup:
|
||||
return known_content_table
|
||||
# get generic known content
|
||||
|
@ -113,7 +115,7 @@ class Context():
|
|||
|
||||
return known_content_table
|
||||
|
||||
def store_known_legitimate_tree(self, tree: CrawledTree):
|
||||
def store_known_legitimate_tree(self, tree: CrawledTree) -> None:
|
||||
known_content = self.find_known_content(tree)
|
||||
capture_file: Path = get_homedir() / 'known_content_user' / f'{urlsplit(tree.root_url).hostname}.json'
|
||||
if capture_file.exists():
|
||||
|
@ -156,7 +158,7 @@ class Context():
|
|||
with open(capture_file, 'w') as f:
|
||||
json.dump(to_store, f, indent=2, default=serialize_to_json)
|
||||
|
||||
def mark_as_legitimate(self, tree: CrawledTree, hostnode_uuid: Optional[str]=None, urlnode_uuid: Optional[str]=None) -> None:
|
||||
def mark_as_legitimate(self, tree: CrawledTree, hostnode_uuid: str | None=None, urlnode_uuid: str | None=None) -> None:
|
||||
if hostnode_uuid:
|
||||
urlnodes = tree.root_hartree.get_host_node_by_uuid(hostnode_uuid).urls
|
||||
elif urlnode_uuid:
|
||||
|
@ -214,7 +216,7 @@ class Context():
|
|||
def legitimate_body(self, body_hash: str, legitimate_hostname: str) -> None:
|
||||
self.redis.sadd(f'bh|{body_hash}|legitimate', legitimate_hostname)
|
||||
|
||||
def store_known_malicious_ressource(self, ressource_hash: str, details: Dict[str, str]):
|
||||
def store_known_malicious_ressource(self, ressource_hash: str, details: dict[str, str]) -> None:
|
||||
known_malicious_ressource_file = get_homedir() / 'known_content_user' / 'malicious.json'
|
||||
if known_malicious_ressource_file.exists():
|
||||
with open(known_malicious_ressource_file) as f:
|
||||
|
@ -236,7 +238,7 @@ class Context():
|
|||
with open(known_malicious_ressource_file, 'w') as f:
|
||||
json.dump(to_store, f, indent=2, default=serialize_to_json)
|
||||
|
||||
def add_malicious(self, ressource_hash: str, details: Dict[str, str]):
|
||||
def add_malicious(self, ressource_hash: str, details: dict[str, str]) -> None:
|
||||
self.store_known_malicious_ressource(ressource_hash, details)
|
||||
p = self.redis.pipeline()
|
||||
p.sadd('bh|malicious', ressource_hash)
|
||||
|
@ -246,7 +248,7 @@ class Context():
|
|||
p.sadd(f'{ressource_hash}|tag', details['type'])
|
||||
p.execute()
|
||||
|
||||
def store_known_legitimate_ressource(self, ressource_hash: str, details: Dict[str, str]):
|
||||
def store_known_legitimate_ressource(self, ressource_hash: str, details: dict[str, str]) -> None:
|
||||
known_legitimate_ressource_file = get_homedir() / 'known_content_user' / 'legitimate.json'
|
||||
if known_legitimate_ressource_file.exists():
|
||||
with open(known_legitimate_ressource_file) as f:
|
||||
|
@ -267,7 +269,7 @@ class Context():
|
|||
with open(known_legitimate_ressource_file, 'w') as f:
|
||||
json.dump(to_store, f, indent=2, default=serialize_to_json)
|
||||
|
||||
def add_legitimate(self, ressource_hash: str, details: Dict[str, str]):
|
||||
def add_legitimate(self, ressource_hash: str, details: dict[str, str]) -> None:
|
||||
self.store_known_legitimate_ressource(ressource_hash, details)
|
||||
if 'domain' in details:
|
||||
self.redis.sadd(f'bh|{ressource_hash}|legitimate', details['domain'])
|
||||
|
@ -277,7 +279,7 @@ class Context():
|
|||
|
||||
# Query DB
|
||||
|
||||
def is_legitimate(self, urlnode: URLNode, known_hashes: Dict[str, Any]) -> Optional[bool]:
|
||||
def is_legitimate(self, urlnode: URLNode, known_hashes: dict[str, Any]) -> bool | None:
|
||||
"""
|
||||
If legitimate if generic, marked as legitimate or known on sanejs, loaded from the right domain
|
||||
3 cases:
|
||||
|
@ -285,7 +287,7 @@ class Context():
|
|||
* False if *any* content is malicious
|
||||
* None in all other cases
|
||||
"""
|
||||
status: List[Optional[bool]] = []
|
||||
status: list[bool | None] = []
|
||||
for h in urlnode.resources_hashes:
|
||||
# Note: we can have multiple hashes on the same urlnode (see embedded resources).
|
||||
if h not in known_hashes:
|
||||
|
@ -305,7 +307,7 @@ class Context():
|
|||
return True # All the contents are known legitimate
|
||||
return None
|
||||
|
||||
def is_malicious(self, urlnode: URLNode, known_hashes: Dict[str, Any]) -> Optional[bool]:
|
||||
def is_malicious(self, urlnode: URLNode, known_hashes: dict[str, Any]) -> bool | None:
|
||||
"""3 cases:
|
||||
* True if *any* content is malicious
|
||||
* False if *all* the contents are known legitimate
|
||||
|
|
|
@ -16,3 +16,17 @@ from .exceptions import MissingEnv, CreateDirectoryException, ConfigError # noq
|
|||
from .helpers import get_homedir, load_configs, get_config, safe_create_dir, get_socket_path, try_make_file # noqa
|
||||
|
||||
os.chdir(get_homedir())
|
||||
|
||||
__all__ = [
|
||||
'LookylooException',
|
||||
'AbstractManager',
|
||||
'MissingEnv',
|
||||
'CreateDirectoryException',
|
||||
'ConfigError',
|
||||
'get_homedir',
|
||||
'load_configs',
|
||||
'get_config',
|
||||
'safe_create_dir',
|
||||
'get_socket_path',
|
||||
'try_make_file',
|
||||
]
|
||||
|
|
|
@ -1,14 +1,16 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import logging.config
|
||||
import os
|
||||
import signal
|
||||
import time
|
||||
from abc import ABC
|
||||
from datetime import datetime, timedelta
|
||||
from subprocess import Popen
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
from redis import Redis
|
||||
from redis.exceptions import ConnectionError as RedisConnectionError
|
||||
|
@ -20,18 +22,18 @@ class AbstractManager(ABC):
|
|||
|
||||
script_name: str
|
||||
|
||||
def __init__(self, loglevel: Optional[int]=None):
|
||||
def __init__(self, loglevel: int | None=None):
|
||||
self.loglevel: int = loglevel if loglevel is not None else get_config('generic', 'loglevel') or logging.INFO
|
||||
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
||||
self.logger.setLevel(self.loglevel)
|
||||
self.logger.info(f'Initializing {self.__class__.__name__}')
|
||||
self.process: Optional[Popen] = None
|
||||
self.process: Popen | None = None # type: ignore[type-arg]
|
||||
self.__redis = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
|
||||
|
||||
self.force_stop = False
|
||||
|
||||
@staticmethod
|
||||
def is_running() -> List[Tuple[str, float]]:
|
||||
def is_running() -> list[tuple[str, float]]:
|
||||
try:
|
||||
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
|
||||
for script_name, score in r.zrangebyscore('running', '-inf', '+inf', withscores=True):
|
||||
|
@ -52,7 +54,7 @@ class AbstractManager(ABC):
|
|||
return []
|
||||
|
||||
@staticmethod
|
||||
def clear_running():
|
||||
def clear_running() -> None:
|
||||
try:
|
||||
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
|
||||
r.delete('running')
|
||||
|
@ -60,14 +62,14 @@ class AbstractManager(ABC):
|
|||
print('Unable to connect to redis, the system is down.')
|
||||
|
||||
@staticmethod
|
||||
def force_shutdown():
|
||||
def force_shutdown() -> None:
|
||||
try:
|
||||
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
|
||||
r.set('shutdown', 1)
|
||||
except RedisConnectionError:
|
||||
print('Unable to connect to redis, the system is down.')
|
||||
|
||||
def set_running(self, number: Optional[int]=None) -> None:
|
||||
def set_running(self, number: int | None=None) -> None:
|
||||
if number == 0:
|
||||
self.__redis.zrem('running', self.script_name)
|
||||
else:
|
||||
|
@ -111,7 +113,7 @@ class AbstractManager(ABC):
|
|||
def _to_run_forever(self) -> None:
|
||||
raise NotImplementedError('This method must be implemented by the child')
|
||||
|
||||
def _kill_process(self):
|
||||
def _kill_process(self) -> None:
|
||||
if self.process is None:
|
||||
return
|
||||
kill_order = [signal.SIGWINCH, signal.SIGTERM, signal.SIGINT, signal.SIGKILL]
|
||||
|
@ -167,7 +169,7 @@ class AbstractManager(ABC):
|
|||
def _wait_to_finish(self) -> None:
|
||||
self.logger.info('Not implemented, nothing to wait for.')
|
||||
|
||||
async def stop(self):
|
||||
async def stop(self) -> None:
|
||||
self.force_stop = True
|
||||
|
||||
async def _to_run_forever_async(self) -> None:
|
||||
|
@ -176,7 +178,7 @@ class AbstractManager(ABC):
|
|||
async def _wait_to_finish_async(self) -> None:
|
||||
self.logger.info('Not implemented, nothing to wait for.')
|
||||
|
||||
async def stop_async(self):
|
||||
async def stop_async(self) -> None:
|
||||
"""Method to pass the signal handler:
|
||||
loop.add_signal_handler(signal.SIGTERM, lambda: loop.create_task(p.stop()))
|
||||
"""
|
||||
|
|
|
@ -1,4 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
@ -9,7 +12,7 @@ from typing import Any, Dict, Optional, Union
|
|||
from . import env_global_name
|
||||
from .exceptions import ConfigError, CreateDirectoryException, MissingEnv
|
||||
|
||||
configs: Dict[str, Dict[str, Any]] = {}
|
||||
configs: dict[str, dict[str, Any]] = {}
|
||||
logger = logging.getLogger('Helpers')
|
||||
|
||||
|
||||
|
@ -34,7 +37,7 @@ Run the following command (assuming you run the code from the clonned repository
|
|||
|
||||
|
||||
@lru_cache(64)
|
||||
def load_configs(path_to_config_files: Optional[Union[str, Path]]=None):
|
||||
def load_configs(path_to_config_files: str | Path | None=None) -> None:
|
||||
global configs
|
||||
if configs:
|
||||
return
|
||||
|
@ -57,7 +60,7 @@ def load_configs(path_to_config_files: Optional[Union[str, Path]]=None):
|
|||
|
||||
|
||||
@lru_cache(64)
|
||||
def get_config(config_type: str, entry: Optional[str]=None, quiet: bool=False) -> Any:
|
||||
def get_config(config_type: str, entry: str | None=None, quiet: bool=False) -> Any:
|
||||
"""Get an entry from the given config_type file. Automatic fallback to the sample file"""
|
||||
global configs
|
||||
if not configs:
|
||||
|
@ -97,7 +100,7 @@ def get_socket_path(name: str) -> str:
|
|||
return str(get_homedir() / mapping[name])
|
||||
|
||||
|
||||
def try_make_file(filename: Path):
|
||||
def try_make_file(filename: Path) -> bool:
|
||||
try:
|
||||
filename.touch(exist_ok=False)
|
||||
return True
|
||||
|
|
|
@ -14,23 +14,22 @@ from typing import Any, Dict, List, Optional, Set, Union, Tuple
|
|||
from urllib.parse import urlparse
|
||||
|
||||
|
||||
from har2tree import CrawledTree, HostNode, URLNode
|
||||
from har2tree import CrawledTree, HostNode, URLNode # type: ignore[attr-defined]
|
||||
from playwrightcapture import get_devices
|
||||
from publicsuffixlist import PublicSuffixList # type: ignore
|
||||
from pytaxonomies import Taxonomies
|
||||
from pytaxonomies import Taxonomies # type: ignore[attr-defined]
|
||||
from ua_parser import user_agent_parser # type: ignore
|
||||
from werkzeug.user_agent import UserAgent
|
||||
from werkzeug.utils import cached_property
|
||||
|
||||
from .default import get_homedir, safe_create_dir, get_config
|
||||
from .exceptions import LookylooException
|
||||
from .default import get_homedir, safe_create_dir, get_config, LookylooException
|
||||
|
||||
logger = logging.getLogger('Lookyloo - Helpers')
|
||||
|
||||
|
||||
# This method is used in json.dump or json.dumps calls as the default parameter:
|
||||
# json.dumps(..., default=dump_to_json)
|
||||
def serialize_to_json(obj: Union[Set]) -> Union[List]:
|
||||
def serialize_to_json(obj: Union[Set[Any]]) -> Union[List[Any]]:
|
||||
if isinstance(obj, set):
|
||||
return sorted(obj)
|
||||
|
||||
|
@ -52,12 +51,12 @@ def get_resources_hashes(har2tree_container: Union[CrawledTree, HostNode, URLNod
|
|||
|
||||
|
||||
@lru_cache(64)
|
||||
def get_taxonomies():
|
||||
def get_taxonomies() -> Taxonomies:
|
||||
return Taxonomies()
|
||||
|
||||
|
||||
@lru_cache(64)
|
||||
def get_public_suffix_list():
|
||||
def get_public_suffix_list() -> PublicSuffixList:
|
||||
"""Initialize Public Suffix List"""
|
||||
# TODO (?): fetch the list
|
||||
return PublicSuffixList()
|
||||
|
@ -131,7 +130,7 @@ def get_sorted_captures_from_disk(captures_dir: Path, /, *,
|
|||
|
||||
class UserAgents:
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
if get_config('generic', 'use_user_agents_users'):
|
||||
self.path = get_homedir() / 'own_user_agents'
|
||||
else:
|
||||
|
@ -145,14 +144,14 @@ class UserAgents:
|
|||
self.playwright_devices = get_devices()
|
||||
self._load_newest_ua_file(ua_files_path[0])
|
||||
|
||||
def _load_newest_ua_file(self, path: Path):
|
||||
def _load_newest_ua_file(self, path: Path) -> None:
|
||||
self.most_recent_ua_path = path
|
||||
with self.most_recent_ua_path.open() as f:
|
||||
self.most_recent_uas = json.load(f)
|
||||
self.by_freq = self.most_recent_uas.pop('by_frequency')
|
||||
self._load_playwright_devices()
|
||||
|
||||
def _load_playwright_devices(self):
|
||||
def _load_playwright_devices(self) -> None:
|
||||
# Only get default and desktop for now.
|
||||
for device_name, details in self.playwright_devices['desktop']['default'].items():
|
||||
parsed_ua = ParsedUserAgent(details['user_agent'])
|
||||
|
@ -254,16 +253,17 @@ def load_cookies(cookie_pseudofile: Optional[Union[BufferedIOBase, str, bytes, L
|
|||
return to_return
|
||||
|
||||
|
||||
def uniq_domains(uniq_urls):
|
||||
def uniq_domains(uniq_urls: List[str]) -> Set[str]:
|
||||
domains = set()
|
||||
for url in uniq_urls:
|
||||
splitted = urlparse(url)
|
||||
if splitted.hostname:
|
||||
domains.add(splitted.hostname)
|
||||
return domains
|
||||
|
||||
|
||||
@lru_cache(64)
|
||||
def get_useragent_for_requests():
|
||||
def get_useragent_for_requests() -> str:
|
||||
return f'Lookyloo / {version("lookyloo")}'
|
||||
|
||||
|
||||
|
@ -331,11 +331,11 @@ class ParsedUserAgent(UserAgent):
|
|||
# from https://python.tutorialink.com/how-do-i-get-the-user-agent-with-flask/
|
||||
|
||||
@cached_property
|
||||
def _details(self):
|
||||
def _details(self) -> Dict[str, Any]:
|
||||
return user_agent_parser.Parse(self.string)
|
||||
|
||||
@property
|
||||
def platform(self):
|
||||
def platform(self) -> Optional[str]: # type: ignore[override]
|
||||
return self._details['os'].get('family')
|
||||
|
||||
@property
|
||||
|
@ -343,11 +343,11 @@ class ParsedUserAgent(UserAgent):
|
|||
return self._aggregate_version(self._details['os'])
|
||||
|
||||
@property
|
||||
def browser(self):
|
||||
def browser(self) -> Optional[str]: # type: ignore[override]
|
||||
return self._details['user_agent'].get('family')
|
||||
|
||||
@property
|
||||
def version(self):
|
||||
def version(self) -> Optional[str]: # type: ignore[override]
|
||||
return self._aggregate_version(self._details['user_agent'])
|
||||
|
||||
def _aggregate_version(self, details: Dict[str, str]) -> Optional[str]:
|
||||
|
@ -357,5 +357,5 @@ class ParsedUserAgent(UserAgent):
|
|||
if (part := details.get(key)) is not None
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
def __str__(self) -> str:
|
||||
return f'OS: {self.platform} - Browser: {self.browser} {self.version} - UA: {self.string}'
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
# import re
|
||||
|
@ -7,7 +9,7 @@ from collections import defaultdict
|
|||
from typing import Dict, Iterable, List, Optional, Set, Tuple
|
||||
from urllib.parse import urlsplit
|
||||
|
||||
from har2tree import CrawledTree
|
||||
from har2tree import CrawledTree # type: ignore[attr-defined]
|
||||
from redis import ConnectionPool, Redis
|
||||
from redis.connection import UnixDomainSocketConnection
|
||||
|
||||
|
@ -23,11 +25,11 @@ class Indexing():
|
|||
self.redis_pool: ConnectionPool = ConnectionPool(connection_class=UnixDomainSocketConnection,
|
||||
path=get_socket_path('indexing'), decode_responses=True)
|
||||
|
||||
def clear_indexes(self):
|
||||
def clear_indexes(self) -> None:
|
||||
self.redis.flushdb()
|
||||
|
||||
@property
|
||||
def redis(self):
|
||||
def redis(self) -> Redis: # type: ignore[type-arg]
|
||||
return Redis(connection_pool=self.redis_pool)
|
||||
|
||||
def new_internal_uuids(self, crawled_tree: CrawledTree) -> None:
|
||||
|
@ -45,25 +47,25 @@ class Indexing():
|
|||
# ###### Cookies ######
|
||||
|
||||
@property
|
||||
def cookies_names(self) -> List[Tuple[str, float]]:
|
||||
def cookies_names(self) -> list[tuple[str, float]]:
|
||||
return self.redis.zrevrange('cookies_names', 0, -1, withscores=True)
|
||||
|
||||
def cookies_names_number_domains(self, cookie_name: str) -> int:
|
||||
return self.redis.zcard(f'cn|{cookie_name}')
|
||||
|
||||
def cookies_names_domains_values(self, cookie_name: str, domain: str) -> List[Tuple[str, float]]:
|
||||
def cookies_names_domains_values(self, cookie_name: str, domain: str) -> list[tuple[str, float]]:
|
||||
return self.redis.zrevrange(f'cn|{cookie_name}|{domain}', 0, -1, withscores=True)
|
||||
|
||||
def get_cookie_domains(self, cookie_name: str) -> List[Tuple[str, float]]:
|
||||
def get_cookie_domains(self, cookie_name: str) -> list[tuple[str, float]]:
|
||||
return self.redis.zrevrange(f'cn|{cookie_name}', 0, -1, withscores=True)
|
||||
|
||||
def get_cookies_names_captures(self, cookie_name: str) -> List[Tuple[str, str]]:
|
||||
def get_cookies_names_captures(self, cookie_name: str) -> list[tuple[str, str]]:
|
||||
return [uuids.split('|') for uuids in self.redis.smembers(f'cn|{cookie_name}|captures')]
|
||||
|
||||
def _reindex_cookies_capture(self, crawled_tree: CrawledTree) -> None:
|
||||
pipeline = self.redis.pipeline()
|
||||
already_loaded: Set[Tuple[str, str]] = set()
|
||||
already_cleaned_up: Set[str] = set()
|
||||
already_loaded: set[tuple[str, str]] = set()
|
||||
already_cleaned_up: set[str] = set()
|
||||
for urlnode in crawled_tree.root_hartree.url_tree.traverse():
|
||||
if 'cookies_received' not in urlnode.features:
|
||||
continue
|
||||
|
@ -90,7 +92,7 @@ class Indexing():
|
|||
self.redis.sadd('indexed_cookies', crawled_tree.uuid)
|
||||
|
||||
pipeline = self.redis.pipeline()
|
||||
already_loaded: Set[Tuple[str, str]] = set()
|
||||
already_loaded: set[tuple[str, str]] = set()
|
||||
for urlnode in crawled_tree.root_hartree.url_tree.traverse():
|
||||
if 'cookies_received' not in urlnode.features:
|
||||
continue
|
||||
|
@ -131,13 +133,13 @@ class Indexing():
|
|||
# ###### Body hashes ######
|
||||
|
||||
@property
|
||||
def ressources(self) -> List[Tuple[str, float]]:
|
||||
def ressources(self) -> list[tuple[str, float]]:
|
||||
return self.redis.zrevrange('body_hashes', 0, 200, withscores=True)
|
||||
|
||||
def ressources_number_domains(self, h: str) -> int:
|
||||
return self.redis.zcard(f'bh|{h}')
|
||||
|
||||
def body_hash_fequency(self, body_hash: str) -> Dict[str, int]:
|
||||
def body_hash_fequency(self, body_hash: str) -> dict[str, int]:
|
||||
pipeline = self.redis.pipeline()
|
||||
pipeline.zscore('body_hashes', body_hash)
|
||||
pipeline.zcard(f'bh|{body_hash}')
|
||||
|
@ -151,7 +153,7 @@ class Indexing():
|
|||
|
||||
def _reindex_body_hashes_capture(self, crawled_tree: CrawledTree) -> None:
|
||||
# if the capture is regenerated, the hostnodes/urlnodes UUIDs are changed
|
||||
cleaned_up_hashes: Set[str] = set()
|
||||
cleaned_up_hashes: set[str] = set()
|
||||
pipeline = self.redis.pipeline()
|
||||
for urlnode in crawled_tree.root_hartree.url_tree.traverse():
|
||||
for h in urlnode.resources_hashes:
|
||||
|
@ -181,17 +183,17 @@ class Indexing():
|
|||
f'{urlnode.uuid}|{urlnode.hostnode_uuid}|{urlnode.name}')
|
||||
pipeline.execute()
|
||||
|
||||
def get_hash_uuids(self, body_hash: str) -> Tuple[str, str, str]:
|
||||
def get_hash_uuids(self, body_hash: str) -> tuple[str, str, str]:
|
||||
"""Use that to get a reference allowing to fetch a resource from one of the capture."""
|
||||
capture_uuid: str = self.redis.srandmember(f'bh|{body_hash}|captures')
|
||||
capture_uuid = str(self.redis.srandmember(f'bh|{body_hash}|captures'))
|
||||
entry = self.redis.zrange(f'bh|{body_hash}|captures|{capture_uuid}', 0, 1)[0]
|
||||
urlnode_uuid, hostnode_uuid, url = entry.split('|', 2)
|
||||
return capture_uuid, urlnode_uuid, hostnode_uuid
|
||||
|
||||
def get_body_hash_captures(self, body_hash: str, filter_url: Optional[str]=None,
|
||||
filter_capture_uuid: Optional[str]=None,
|
||||
def get_body_hash_captures(self, body_hash: str, filter_url: str | None=None,
|
||||
filter_capture_uuid: str | None=None,
|
||||
limit: int=20,
|
||||
prefered_uuids: Set[str]=set()) -> Tuple[int, List[Tuple[str, str, str, bool]]]:
|
||||
prefered_uuids: set[str]=set()) -> tuple[int, list[tuple[str, str, str, bool]]]:
|
||||
'''Get the captures matching the hash.
|
||||
|
||||
:param filter_url: URL of the hash we're searching for
|
||||
|
@ -199,7 +201,7 @@ class Indexing():
|
|||
:param limit: Max matching captures to return, -1 means unlimited.
|
||||
:param prefered_uuids: UUID cached right now, so we don't rebuild trees.
|
||||
'''
|
||||
to_return: List[Tuple[str, str, str, bool]] = []
|
||||
to_return: list[tuple[str, str, str, bool]] = []
|
||||
len_captures = self.redis.scard(f'bh|{body_hash}|captures')
|
||||
unlimited = False
|
||||
if limit == -1:
|
||||
|
@ -224,11 +226,11 @@ class Indexing():
|
|||
break
|
||||
return len_captures, to_return
|
||||
|
||||
def get_body_hash_domains(self, body_hash: str) -> List[Tuple[str, float]]:
|
||||
def get_body_hash_domains(self, body_hash: str) -> list[tuple[str, float]]:
|
||||
return self.redis.zrevrange(f'bh|{body_hash}', 0, -1, withscores=True)
|
||||
|
||||
def get_body_hash_urls(self, body_hash: str) -> Dict[str, List[Dict[str, str]]]:
|
||||
all_captures: Set[str] = self.redis.smembers(f'bh|{body_hash}|captures')
|
||||
def get_body_hash_urls(self, body_hash: str) -> dict[str, list[dict[str, str]]]:
|
||||
all_captures: set[str] = self.redis.smembers(f'bh|{body_hash}|captures')
|
||||
urls = defaultdict(list)
|
||||
for capture_uuid in list(all_captures):
|
||||
for entry in self.redis.zrevrange(f'bh|{body_hash}|captures|{capture_uuid}', 0, -1):
|
||||
|
@ -239,19 +241,19 @@ class Indexing():
|
|||
# ###### HTTP Headers Hashes ######
|
||||
|
||||
@property
|
||||
def http_headers_hashes(self) -> List[Tuple[str, float]]:
|
||||
def http_headers_hashes(self) -> list[tuple[str, float]]:
|
||||
return self.redis.zrevrange('hhhashes', 0, -1, withscores=True)
|
||||
|
||||
def http_headers_hashes_number_captures(self, hhh: str) -> int:
|
||||
return self.redis.scard(f'hhhashes|{hhh}|captures')
|
||||
|
||||
def get_http_headers_hashes_captures(self, hhh: str) -> List[Tuple[str, str]]:
|
||||
def get_http_headers_hashes_captures(self, hhh: str) -> list[tuple[str, str]]:
|
||||
return [uuids.split('|') for uuids in self.redis.smembers(f'hhhashes|{hhh}|captures')]
|
||||
|
||||
def _reindex_http_headers_hashes_capture(self, crawled_tree: CrawledTree) -> None:
|
||||
pipeline = self.redis.pipeline()
|
||||
already_loaded: Set[str] = set()
|
||||
already_cleaned_up: Set[str] = set()
|
||||
already_loaded: set[str] = set()
|
||||
already_cleaned_up: set[str] = set()
|
||||
for urlnode in crawled_tree.root_hartree.url_tree.traverse():
|
||||
if 'hhhash' not in urlnode.features:
|
||||
continue
|
||||
|
@ -276,7 +278,7 @@ class Indexing():
|
|||
self.redis.sadd('indexed_hhhashes', crawled_tree.uuid)
|
||||
|
||||
pipeline = self.redis.pipeline()
|
||||
already_loaded: Set[str] = set()
|
||||
already_loaded: set[str] = set()
|
||||
for urlnode in crawled_tree.root_hartree.url_tree.traverse():
|
||||
if 'hhhash' not in urlnode.features:
|
||||
continue
|
||||
|
@ -291,11 +293,11 @@ class Indexing():
|
|||
# ###### URLs and Domains ######
|
||||
|
||||
@property
|
||||
def urls(self) -> List[Tuple[str, float]]:
|
||||
def urls(self) -> list[tuple[str, float]]:
|
||||
return self.redis.zrevrange('urls', 0, 200, withscores=True)
|
||||
|
||||
@property
|
||||
def hostnames(self) -> List[Tuple[str, float]]:
|
||||
def hostnames(self) -> list[tuple[str, float]]:
|
||||
return self.redis.zrevrange('hostnames', 0, 200, withscores=True)
|
||||
|
||||
def index_url_capture(self, crawled_tree: CrawledTree) -> None:
|
||||
|
@ -316,21 +318,21 @@ class Indexing():
|
|||
pipeline.sadd(f'urls|{md5}|captures', crawled_tree.uuid)
|
||||
pipeline.execute()
|
||||
|
||||
def get_captures_url(self, url: str) -> Set[str]:
|
||||
def get_captures_url(self, url: str) -> set[str]:
|
||||
md5 = hashlib.md5(url.encode()).hexdigest()
|
||||
return self.redis.smembers(f'urls|{md5}|captures')
|
||||
|
||||
def get_captures_hostname(self, hostname: str) -> Set[str]:
|
||||
def get_captures_hostname(self, hostname: str) -> set[str]:
|
||||
return self.redis.smembers(f'hostnames|{hostname}|captures')
|
||||
|
||||
# ###### Categories ######
|
||||
|
||||
@property
|
||||
def categories(self) -> List[Tuple[str, int]]:
|
||||
def categories(self) -> list[tuple[str, int]]:
|
||||
return [(c, int(score))
|
||||
for c, score in self.redis.zrevrange('categories', 0, 200, withscores=True)]
|
||||
|
||||
def index_categories_capture(self, capture_uuid: str, categories: Iterable[str]):
|
||||
def index_categories_capture(self, capture_uuid: str, categories: Iterable[str]) -> None:
|
||||
if not categories:
|
||||
return
|
||||
if self.redis.sismember('indexed_categories', capture_uuid):
|
||||
|
@ -345,5 +347,5 @@ class Indexing():
|
|||
pipeline.sadd(category, capture_uuid)
|
||||
pipeline.execute()
|
||||
|
||||
def get_captures_category(self, category: str) -> Set[str]:
|
||||
def get_captures_category(self, category: str) -> set[str]:
|
||||
return self.redis.smembers(category)
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import copy
|
||||
import gzip
|
||||
|
@ -22,7 +24,7 @@ from uuid import uuid4
|
|||
from zipfile import ZipFile
|
||||
|
||||
from defang import defang # type: ignore
|
||||
from har2tree import CrawledTree, HostNode, URLNode
|
||||
from har2tree import CrawledTree, HostNode, URLNode # type: ignore[attr-defined]
|
||||
from lacuscore import (LacusCore,
|
||||
CaptureStatus as CaptureStatusCore,
|
||||
# CaptureResponse as CaptureResponseCore)
|
||||
|
@ -30,15 +32,15 @@ from lacuscore import (LacusCore,
|
|||
CaptureSettings as CaptureSettingsCore)
|
||||
from PIL import Image, UnidentifiedImageError
|
||||
from playwrightcapture import get_devices
|
||||
from pylacus import (PyLacus,
|
||||
from pylacus import (PyLacus, # type: ignore[attr-defined]
|
||||
CaptureStatus as CaptureStatusPy
|
||||
# CaptureResponse as CaptureResponsePy,
|
||||
# CaptureResponseJson as CaptureResponseJsonPy,
|
||||
# CaptureSettings as CaptureSettingsPy
|
||||
)
|
||||
from pymisp import MISPAttribute, MISPEvent, MISPObject
|
||||
from pysecuritytxt import PySecurityTXT, SecurityTXTNotAvailable
|
||||
from pylookyloomonitoring import PyLookylooMonitoring
|
||||
from pymisp import MISPAttribute, MISPEvent, MISPObject # type: ignore[attr-defined]
|
||||
from pysecuritytxt import PySecurityTXT, SecurityTXTNotAvailable # type: ignore[attr-defined]
|
||||
from pylookyloomonitoring import PyLookylooMonitoring # type: ignore[attr-defined]
|
||||
from redis import ConnectionPool, Redis
|
||||
from redis.connection import UnixDomainSocketConnection
|
||||
|
||||
|
@ -62,13 +64,13 @@ if TYPE_CHECKING:
|
|||
|
||||
class CaptureSettings(CaptureSettingsCore, total=False):
|
||||
'''The capture settings that can be passed to Lookyloo'''
|
||||
listing: Optional[int]
|
||||
not_queued: Optional[int]
|
||||
auto_report: Optional[Union[bool, str, Dict[str, str]]]
|
||||
dnt: Optional[str]
|
||||
browser_name: Optional[str]
|
||||
os: Optional[str]
|
||||
parent: Optional[str]
|
||||
listing: int | None
|
||||
not_queued: int | None
|
||||
auto_report: bool | str | dict[str, str] | None
|
||||
dnt: str | None
|
||||
browser_name: str | None
|
||||
os: str | None
|
||||
parent: str | None
|
||||
|
||||
|
||||
class Lookyloo():
|
||||
|
@ -153,13 +155,13 @@ class Lookyloo():
|
|||
self.lacus
|
||||
|
||||
@property
|
||||
def redis(self):
|
||||
def redis(self) -> Redis: # type: ignore[type-arg]
|
||||
return Redis(connection_pool=self.redis_pool)
|
||||
|
||||
@cached_property
|
||||
def lacus(self):
|
||||
def lacus(self) -> PyLacus | LacusCore:
|
||||
has_remote_lacus = False
|
||||
self._lacus: Union[PyLacus, LacusCore]
|
||||
self._lacus: PyLacus | LacusCore
|
||||
if get_config('generic', 'remote_lacus'):
|
||||
remote_lacus_config = get_config('generic', 'remote_lacus')
|
||||
if remote_lacus_config.get('enable'):
|
||||
|
@ -180,7 +182,7 @@ class Lookyloo():
|
|||
|
||||
if not has_remote_lacus:
|
||||
# We need a redis connector that doesn't decode.
|
||||
redis: Redis = Redis(unix_socket_path=get_socket_path('cache'))
|
||||
redis: Redis = Redis(unix_socket_path=get_socket_path('cache')) # type: ignore[type-arg]
|
||||
self._lacus = LacusCore(redis, tor_proxy=get_config('generic', 'tor_proxy'),
|
||||
max_capture_time=get_config('generic', 'max_capture_time'),
|
||||
only_global_lookups=get_config('generic', 'only_global_lookups'),
|
||||
|
@ -188,14 +190,14 @@ class Lookyloo():
|
|||
return self._lacus
|
||||
|
||||
def add_context(self, capture_uuid: str, /, urlnode_uuid: str, *, ressource_hash: str,
|
||||
legitimate: bool, malicious: bool, details: Dict[str, Dict[str, str]]):
|
||||
legitimate: bool, malicious: bool, details: dict[str, dict[str, str]]) -> None:
|
||||
'''Adds context information to a capture or a URL node'''
|
||||
if malicious:
|
||||
self.context.add_malicious(ressource_hash, details['malicious'])
|
||||
if legitimate:
|
||||
self.context.add_legitimate(ressource_hash, details['legitimate'])
|
||||
|
||||
def add_to_legitimate(self, capture_uuid: str, /, hostnode_uuid: Optional[str]=None, urlnode_uuid: Optional[str]=None):
|
||||
def add_to_legitimate(self, capture_uuid: str, /, hostnode_uuid: str | None=None, urlnode_uuid: str | None=None) -> None:
|
||||
'''Mark a full capture as legitimate.
|
||||
Iterates over all the nodes and mark them all as legitimate too.'''
|
||||
ct = self.get_crawled_tree(capture_uuid)
|
||||
|
@ -225,12 +227,12 @@ class Lookyloo():
|
|||
ct = self.get_crawled_tree(capture_uuid)
|
||||
return ct.root_hartree.get_host_node_by_uuid(node_uuid)
|
||||
|
||||
def get_statistics(self, capture_uuid: str, /) -> Dict[str, Any]:
|
||||
def get_statistics(self, capture_uuid: str, /) -> dict[str, Any]:
|
||||
'''Get the statistics of a capture.'''
|
||||
ct = self.get_crawled_tree(capture_uuid)
|
||||
return ct.root_hartree.stats
|
||||
|
||||
def get_info(self, capture_uuid: str, /) -> Dict[str, Any]:
|
||||
def get_info(self, capture_uuid: str, /) -> dict[str, Any]:
|
||||
'''Get basic information about the capture.'''
|
||||
cache = self.capture_cache(capture_uuid)
|
||||
if not cache:
|
||||
|
@ -254,7 +256,7 @@ class Lookyloo():
|
|||
to_return['referer'] = cache.referer if cache.referer else ''
|
||||
return to_return
|
||||
|
||||
def get_meta(self, capture_uuid: str, /) -> Dict[str, str]:
|
||||
def get_meta(self, capture_uuid: str, /) -> dict[str, str]:
|
||||
'''Get the meta informations from a capture (mostly, details about the User Agent used.)'''
|
||||
cache = self.capture_cache(capture_uuid)
|
||||
if not cache:
|
||||
|
@ -294,7 +296,7 @@ class Lookyloo():
|
|||
return json.load(f)
|
||||
return {}
|
||||
|
||||
def categories_capture(self, capture_uuid: str, /) -> Dict[str, Any]:
|
||||
def categories_capture(self, capture_uuid: str, /) -> dict[str, Any]:
|
||||
'''Get all the categories related to a capture, in MISP Taxonomies format'''
|
||||
categ_file = self._captures_index[capture_uuid].capture_dir / 'categories'
|
||||
# get existing categories if possible
|
||||
|
@ -337,7 +339,7 @@ class Lookyloo():
|
|||
with categ_file.open('w') as f:
|
||||
f.writelines(f'{t}\n' for t in current_categories)
|
||||
|
||||
def trigger_modules(self, capture_uuid: str, /, force: bool=False, auto_trigger: bool=False) -> Dict:
|
||||
def trigger_modules(self, capture_uuid: str, /, force: bool=False, auto_trigger: bool=False) -> dict[str, Any]:
|
||||
'''Launch the 3rd party modules on a capture.
|
||||
It uses the cached result *if* the module was triggered the same day.
|
||||
The `force` flag re-triggers the module regardless of the cache.'''
|
||||
|
@ -350,7 +352,7 @@ class Lookyloo():
|
|||
self.uwhois.capture_default_trigger(ct, force=force, auto_trigger=auto_trigger)
|
||||
self.hashlookup.capture_default_trigger(ct, auto_trigger=auto_trigger)
|
||||
|
||||
to_return: Dict[str, Dict] = {'PhishingInitiative': {}, 'VirusTotal': {}, 'UrlScan': {},
|
||||
to_return: dict[str, dict[str, Any]] = {'PhishingInitiative': {}, 'VirusTotal': {}, 'UrlScan': {},
|
||||
'URLhaus': {}}
|
||||
if cache := self.capture_cache(capture_uuid):
|
||||
to_return['PhishingInitiative'] = self.pi.capture_default_trigger(cache, force=force, auto_trigger=auto_trigger)
|
||||
|
@ -363,7 +365,7 @@ class Lookyloo():
|
|||
to_return['URLhaus'] = self.urlhaus.capture_default_trigger(cache, auto_trigger=auto_trigger)
|
||||
return to_return
|
||||
|
||||
def get_modules_responses(self, capture_uuid: str, /) -> Optional[Dict[str, Any]]:
|
||||
def get_modules_responses(self, capture_uuid: str, /) -> dict[str, Any] | None:
|
||||
'''Get the responses of the modules from the cached responses on the disk'''
|
||||
cache = self.capture_cache(capture_uuid)
|
||||
if not cache:
|
||||
|
@ -373,7 +375,7 @@ class Lookyloo():
|
|||
self.logger.warning(f'The capture {capture_uuid} does not have a URL in the cache, it is broken.')
|
||||
return None
|
||||
|
||||
to_return: Dict[str, Any] = {}
|
||||
to_return: dict[str, Any] = {}
|
||||
if self.vt.available:
|
||||
to_return['vt'] = {}
|
||||
if hasattr(cache, 'redirects') and cache.redirects:
|
||||
|
@ -416,7 +418,7 @@ class Lookyloo():
|
|||
to_return['urlscan']['result'] = result
|
||||
return to_return
|
||||
|
||||
def get_historical_lookups(self, capture_uuid: str, /, force: bool=False) -> Dict:
|
||||
def get_historical_lookups(self, capture_uuid: str, /, force: bool=False) -> dict[str, Any]:
|
||||
# this method is only trigered when the user wants to get more details about the capture
|
||||
# by looking at Passive DNS systems, check if there are hits in the current capture
|
||||
# in another one and things like that. The trigger_modules method is for getting
|
||||
|
@ -425,7 +427,7 @@ class Lookyloo():
|
|||
if not cache:
|
||||
self.logger.warning(f'Unable to get the modules responses unless the capture {capture_uuid} is cached')
|
||||
return {}
|
||||
to_return: Dict[str, Any] = defaultdict(dict)
|
||||
to_return: dict[str, Any] = defaultdict(dict)
|
||||
if self.riskiq.available:
|
||||
try:
|
||||
self.riskiq.capture_default_trigger(cache)
|
||||
|
@ -461,7 +463,7 @@ class Lookyloo():
|
|||
def update_tree_cache_info(self, process_id: int, classname: str) -> None:
|
||||
self.redis.hset('tree_cache', f'{process_id}|{classname}', str(self._captures_index.lru_cache_status()))
|
||||
|
||||
def sorted_capture_cache(self, capture_uuids: Optional[Iterable[str]]=None, cached_captures_only: bool=True, index_cut_time: Optional[datetime]=None) -> List[CaptureCache]:
|
||||
def sorted_capture_cache(self, capture_uuids: Iterable[str] | None=None, cached_captures_only: bool=True, index_cut_time: datetime | None=None) -> list[CaptureCache]:
|
||||
'''Get all the captures in the cache, sorted by timestamp (new -> old).
|
||||
By default, this method will only return the captures that are currently cached.'''
|
||||
# Make sure we do not try to load archived captures that would still be in 'lookup_dirs'
|
||||
|
@ -489,13 +491,13 @@ class Lookyloo():
|
|||
# Do not try to build pickles
|
||||
capture_uuids = set(capture_uuids) & self._captures_index.cached_captures
|
||||
|
||||
all_cache: List[CaptureCache] = [self._captures_index[uuid] for uuid in capture_uuids
|
||||
all_cache: list[CaptureCache] = [self._captures_index[uuid] for uuid in capture_uuids
|
||||
if self.capture_cache(uuid)
|
||||
and hasattr(self._captures_index[uuid], 'timestamp')]
|
||||
all_cache.sort(key=operator.attrgetter('timestamp'), reverse=True)
|
||||
return all_cache
|
||||
|
||||
def get_capture_status(self, capture_uuid: str, /) -> Union[CaptureStatusCore, CaptureStatusPy]:
|
||||
def get_capture_status(self, capture_uuid: str, /) -> CaptureStatusCore | CaptureStatusPy:
|
||||
'''Returns the status (queued, ongoing, done, or UUID unknown)'''
|
||||
if self.redis.hexists('lookup_dirs', capture_uuid):
|
||||
return CaptureStatusCore.DONE
|
||||
|
@ -520,7 +522,7 @@ class Lookyloo():
|
|||
return CaptureStatusCore.ONGOING
|
||||
return lacus_status
|
||||
|
||||
def capture_cache(self, capture_uuid: str, /, *, force_update: bool = False) -> Optional[CaptureCache]:
|
||||
def capture_cache(self, capture_uuid: str, /, *, force_update: bool = False) -> CaptureCache | None:
|
||||
"""Get the cache from redis, rebuild the tree if the internal UUID changed => slow"""
|
||||
try:
|
||||
cache = self._captures_index[capture_uuid]
|
||||
|
@ -598,7 +600,7 @@ class Lookyloo():
|
|||
query['user_agent'] = user_agent if user_agent else self.user_agents.default['useragent']
|
||||
|
||||
# NOTE: the document must be base64 encoded
|
||||
document: Optional[Union[str, bytes]] = query.pop('document', None)
|
||||
document: str | bytes | None = query.pop('document', None)
|
||||
if document:
|
||||
if isinstance(document, bytes):
|
||||
query['document'] = base64.b64encode(document).decode()
|
||||
|
@ -631,17 +633,16 @@ class Lookyloo():
|
|||
query = self._prepare_lacus_query(query)
|
||||
|
||||
priority = get_priority(source, user, authenticated)
|
||||
query['priority'] = priority
|
||||
if priority < -100:
|
||||
# Someone is probably abusing the system with useless URLs, remove them from the index
|
||||
query['listing'] = 0
|
||||
try:
|
||||
perma_uuid = self.lacus.enqueue(
|
||||
perma_uuid = self.lacus.enqueue( # type: ignore[misc]
|
||||
url=query.get('url', None),
|
||||
document_name=query.get('document_name', None),
|
||||
document=query.get('document', None),
|
||||
# depth=query.get('depth', 0),
|
||||
browser=query.get('browser', None),
|
||||
browser=query.get('browser', None), # type: ignore[arg-type]
|
||||
device_name=query.get('device_name', None),
|
||||
user_agent=query.get('user_agent', None),
|
||||
proxy=self.global_proxy if self.global_proxy else query.get('proxy', None),
|
||||
|
@ -659,7 +660,7 @@ class Lookyloo():
|
|||
with_favicon=query.get('with_favicon', True),
|
||||
# force=query.get('force', False),
|
||||
# recapture_interval=query.get('recapture_interval', 300),
|
||||
priority=query.get('priority', 0)
|
||||
priority=priority
|
||||
)
|
||||
except Exception as e:
|
||||
self.logger.critical(f'Unable to enqueue capture: {e}')
|
||||
|
@ -670,7 +671,7 @@ class Lookyloo():
|
|||
and self.redis.zscore('to_capture', perma_uuid) is None): # capture ongoing
|
||||
|
||||
# Make the settings redis compatible
|
||||
mapping_capture: Dict[str, Union[bytes, float, int, str]] = {}
|
||||
mapping_capture: dict[str, bytes | float | int | str] = {}
|
||||
for key, value in query.items():
|
||||
if isinstance(value, bool):
|
||||
mapping_capture[key] = 1 if value else 0
|
||||
|
@ -681,15 +682,15 @@ class Lookyloo():
|
|||
mapping_capture[key] = value # type: ignore
|
||||
|
||||
p = self.redis.pipeline()
|
||||
p.zadd('to_capture', {perma_uuid: query['priority']})
|
||||
p.hset(perma_uuid, mapping=mapping_capture)
|
||||
p.zadd('to_capture', {perma_uuid: priority})
|
||||
p.hset(perma_uuid, mapping=mapping_capture) # type: ignore[arg-type]
|
||||
p.zincrby('queues', 1, f'{source}|{authenticated}|{user}')
|
||||
p.set(f'{perma_uuid}_mgmt', f'{source}|{authenticated}|{user}')
|
||||
p.execute()
|
||||
|
||||
return perma_uuid
|
||||
|
||||
def takedown_details(self, hostnode: HostNode) -> Dict[str, Any]:
|
||||
def takedown_details(self, hostnode: HostNode) -> dict[str, Any]:
|
||||
if not self.uwhois.available:
|
||||
self.logger.warning('UWhois module not enabled, unable to use this method')
|
||||
raise LookylooException('UWhois module not enabled, unable to use this method')
|
||||
|
@ -740,7 +741,7 @@ class Lookyloo():
|
|||
to_return['all_emails'] = list(to_return['all_emails'])
|
||||
return to_return
|
||||
|
||||
def contacts(self, capture_uuid: str, /) -> List[Dict[str, Any]]:
|
||||
def contacts(self, capture_uuid: str, /) -> list[dict[str, Any]]:
|
||||
capture = self.get_crawled_tree(capture_uuid)
|
||||
rendered_hostnode = self.get_hostnode_from_tree(capture_uuid, capture.root_hartree.rendered_node.hostnode_uuid)
|
||||
result = []
|
||||
|
@ -749,7 +750,7 @@ class Lookyloo():
|
|||
result.append(self.takedown_details(rendered_hostnode))
|
||||
return result
|
||||
|
||||
def send_mail(self, capture_uuid: str, /, email: str='', comment: Optional[str]=None) -> None:
|
||||
def send_mail(self, capture_uuid: str, /, email: str='', comment: str | None=None) -> None:
|
||||
'''Send an email notification regarding a specific capture'''
|
||||
if not get_config('generic', 'enable_mail_notification'):
|
||||
return
|
||||
|
@ -856,7 +857,7 @@ class Lookyloo():
|
|||
def get_potential_favicons(self, capture_uuid: str, /, all_favicons: Literal[True], for_datauri: Literal[False]) -> BytesIO:
|
||||
...
|
||||
|
||||
def get_potential_favicons(self, capture_uuid: str, /, all_favicons: bool=False, for_datauri: bool=False) -> Union[BytesIO, str]:
|
||||
def get_potential_favicons(self, capture_uuid: str, /, all_favicons: bool=False, for_datauri: bool=False) -> BytesIO | str:
|
||||
'''Get rendered HTML'''
|
||||
fav = self._get_raw(capture_uuid, 'potential_favicons.ico', all_favicons)
|
||||
if not all_favicons and for_datauri:
|
||||
|
@ -867,7 +868,7 @@ class Lookyloo():
|
|||
'''Get rendered HTML'''
|
||||
return self._get_raw(capture_uuid, 'html', all_html)
|
||||
|
||||
def get_data(self, capture_uuid: str, /) -> Tuple[str, BytesIO]:
|
||||
def get_data(self, capture_uuid: str, /) -> tuple[str, BytesIO]:
|
||||
'''Get the data'''
|
||||
return self._get_raw(capture_uuid, 'data.filename', False).getvalue().decode(), self._get_raw(capture_uuid, 'data', False)
|
||||
|
||||
|
@ -879,7 +880,7 @@ class Lookyloo():
|
|||
'''Get the screenshot(s) of the rendered page'''
|
||||
return self._get_raw(capture_uuid, 'png', all_files=False)
|
||||
|
||||
def get_screenshot_thumbnail(self, capture_uuid: str, /, for_datauri: bool=False, width: int=64) -> Union[str, BytesIO]:
|
||||
def get_screenshot_thumbnail(self, capture_uuid: str, /, for_datauri: bool=False, width: int=64) -> str | BytesIO:
|
||||
'''Get the thumbnail of the rendered page. Always crop to a square.'''
|
||||
to_return = BytesIO()
|
||||
size = width, width
|
||||
|
@ -921,12 +922,12 @@ class Lookyloo():
|
|||
'''Get all the files related to this capture.'''
|
||||
return self._get_raw(capture_uuid)
|
||||
|
||||
def get_urls_rendered_page(self, capture_uuid: str, /) -> List[str]:
|
||||
def get_urls_rendered_page(self, capture_uuid: str, /) -> list[str]:
|
||||
ct = self.get_crawled_tree(capture_uuid)
|
||||
return sorted(set(ct.root_hartree.rendered_node.urls_in_rendered_page)
|
||||
- set(ct.root_hartree.all_url_requests.keys()))
|
||||
|
||||
def get_body_hash_investigator(self, body_hash: str, /) -> Tuple[List[Tuple[str, str]], List[Tuple[str, float]]]:
|
||||
def get_body_hash_investigator(self, body_hash: str, /) -> tuple[list[tuple[str, str]], list[tuple[str, float]]]:
|
||||
'''Returns all the captures related to a hash (sha512), used in the web interface.'''
|
||||
total_captures, details = self.indexing.get_body_hash_captures(body_hash, limit=-1)
|
||||
cached_captures = self.sorted_capture_cache([d[0] for d in details])
|
||||
|
@ -934,7 +935,7 @@ class Lookyloo():
|
|||
domains = self.indexing.get_body_hash_domains(body_hash)
|
||||
return captures, domains
|
||||
|
||||
def get_body_hash_full(self, body_hash: str, /) -> Tuple[Dict[str, List[Dict[str, str]]], BytesIO]:
|
||||
def get_body_hash_full(self, body_hash: str, /) -> tuple[dict[str, list[dict[str, str]]], BytesIO]:
|
||||
'''Returns a lot of information about the hash (sha512) and the hits in the instance.
|
||||
Also contains the data (base64 encoded)'''
|
||||
details = self.indexing.get_body_hash_urls(body_hash)
|
||||
|
@ -969,9 +970,9 @@ class Lookyloo():
|
|||
# TODO: Couldn't find the file anywhere. Maybe return a warning in the file?
|
||||
return details, BytesIO()
|
||||
|
||||
def get_all_body_hashes(self, capture_uuid: str, /) -> Dict[str, Dict[str, Union[URLNode, int]]]:
|
||||
def get_all_body_hashes(self, capture_uuid: str, /) -> dict[str, dict[str, URLNode | int]]:
|
||||
ct = self.get_crawled_tree(capture_uuid)
|
||||
to_return: Dict[str, Dict[str, Union[URLNode, int]]] = defaultdict()
|
||||
to_return: dict[str, dict[str, URLNode | int]] = defaultdict()
|
||||
for node in ct.root_hartree.url_tree.traverse():
|
||||
if node.empty_response or node.body_hash in to_return:
|
||||
# If we have the same hash more than once, skip
|
||||
|
@ -981,24 +982,24 @@ class Lookyloo():
|
|||
to_return[node.body_hash] = {'node': node, 'total_captures': total_captures}
|
||||
return to_return
|
||||
|
||||
def get_latest_url_capture(self, url: str, /) -> Optional[CaptureCache]:
|
||||
def get_latest_url_capture(self, url: str, /) -> CaptureCache | None:
|
||||
'''Get the most recent capture with this URL'''
|
||||
captures = self.sorted_capture_cache(self.indexing.get_captures_url(url))
|
||||
if captures:
|
||||
return captures[0]
|
||||
return None
|
||||
|
||||
def get_url_occurrences(self, url: str, /, limit: int=20, cached_captures_only: bool=True) -> List[Dict]:
|
||||
def get_url_occurrences(self, url: str, /, limit: int=20, cached_captures_only: bool=True) -> list[dict[str, Any]]:
|
||||
'''Get the most recent captures and URL nodes where the URL has been seen.'''
|
||||
captures = self.sorted_capture_cache(self.indexing.get_captures_url(url), cached_captures_only=cached_captures_only)
|
||||
|
||||
to_return: List[Dict] = []
|
||||
to_return: list[dict[str, Any]] = []
|
||||
for capture in captures[:limit]:
|
||||
ct = self.get_crawled_tree(capture.uuid)
|
||||
to_append: Dict[str, Union[str, Dict]] = {'capture_uuid': capture.uuid,
|
||||
to_append: dict[str, str | dict[str, Any]] = {'capture_uuid': capture.uuid,
|
||||
'start_timestamp': capture.timestamp.isoformat(),
|
||||
'title': capture.title}
|
||||
urlnodes: Dict[str, Dict[str, str]] = {}
|
||||
urlnodes: dict[str, dict[str, str]] = {}
|
||||
for urlnode in ct.root_hartree.url_tree.search_nodes(name=url):
|
||||
urlnodes[urlnode.uuid] = {'start_time': urlnode.start_time.isoformat(),
|
||||
'hostnode_uuid': urlnode.hostnode_uuid}
|
||||
|
@ -1008,19 +1009,20 @@ class Lookyloo():
|
|||
to_return.append(to_append)
|
||||
return to_return
|
||||
|
||||
def get_hostname_occurrences(self, hostname: str, /, with_urls_occurrences: bool=False, limit: int=20, cached_captures_only: bool=True) -> List[Dict]:
|
||||
def get_hostname_occurrences(self, hostname: str, /, with_urls_occurrences: bool=False, limit: int=20, cached_captures_only: bool=True) -> list[dict[str, Any]]:
|
||||
'''Get the most recent captures and URL nodes where the hostname has been seen.'''
|
||||
captures = self.sorted_capture_cache(self.indexing.get_captures_hostname(hostname), cached_captures_only=cached_captures_only)
|
||||
|
||||
to_return: List[Dict] = []
|
||||
to_return: list[dict[str, Any]] = []
|
||||
for capture in captures[:limit]:
|
||||
ct = self.get_crawled_tree(capture.uuid)
|
||||
to_append: Dict[str, Union[str, List, Dict]] = {'capture_uuid': capture.uuid,
|
||||
to_append: dict[str, str | list[Any] | dict[str, Any]] = {
|
||||
'capture_uuid': capture.uuid,
|
||||
'start_timestamp': capture.timestamp.isoformat(),
|
||||
'title': capture.title}
|
||||
hostnodes: List[str] = []
|
||||
hostnodes: list[str] = []
|
||||
if with_urls_occurrences:
|
||||
urlnodes: Dict[str, Dict[str, str]] = {}
|
||||
urlnodes: dict[str, dict[str, str]] = {}
|
||||
for hostnode in ct.root_hartree.hostname_tree.search_nodes(name=hostname):
|
||||
hostnodes.append(hostnode.uuid)
|
||||
if with_urls_occurrences:
|
||||
|
@ -1036,7 +1038,7 @@ class Lookyloo():
|
|||
to_return.append(to_append)
|
||||
return to_return
|
||||
|
||||
def get_cookie_name_investigator(self, cookie_name: str, /) -> Tuple[List[Tuple[str, str]], List[Tuple[str, float, List[Tuple[str, float]]]]]:
|
||||
def get_cookie_name_investigator(self, cookie_name: str, /) -> tuple[list[tuple[str, str]], list[tuple[str, float, list[tuple[str, float]]]]]:
|
||||
'''Returns all the captures related to a cookie name entry, used in the web interface.'''
|
||||
cached_captures = self.sorted_capture_cache([entry[0] for entry in self.indexing.get_cookies_names_captures(cookie_name)])
|
||||
captures = [(cache.uuid, cache.title) for cache in cached_captures]
|
||||
|
@ -1044,7 +1046,7 @@ class Lookyloo():
|
|||
for domain, freq in self.indexing.get_cookie_domains(cookie_name)]
|
||||
return captures, domains
|
||||
|
||||
def get_hhh_investigator(self, hhh: str, /) -> Tuple[List[Tuple[str, str, str, str]], List[Tuple[str, str]]]:
|
||||
def get_hhh_investigator(self, hhh: str, /) -> tuple[list[tuple[str, str, str, str]], list[tuple[str, str]]]:
|
||||
'''Returns all the captures related to a cookie name entry, used in the web interface.'''
|
||||
all_captures = dict(self.indexing.get_http_headers_hashes_captures(hhh))
|
||||
if cached_captures := self.sorted_capture_cache([entry for entry in all_captures]):
|
||||
|
@ -1063,11 +1065,11 @@ class Lookyloo():
|
|||
return captures, headers
|
||||
return [], []
|
||||
|
||||
def hash_lookup(self, blob_hash: str, url: str, capture_uuid: str) -> Tuple[int, Dict[str, List[Tuple[str, str, str, str, str]]]]:
|
||||
def hash_lookup(self, blob_hash: str, url: str, capture_uuid: str) -> tuple[int, dict[str, list[tuple[str, str, str, str, str]]]]:
|
||||
'''Search all the captures a specific hash was seen.
|
||||
If a URL is given, it splits the results if the hash is seen on the same URL or an other one.
|
||||
Capture UUID avoids duplicates on the same capture'''
|
||||
captures_list: Dict[str, List[Tuple[str, str, str, str, str]]] = {'same_url': [], 'different_url': []}
|
||||
captures_list: dict[str, list[tuple[str, str, str, str, str]]] = {'same_url': [], 'different_url': []}
|
||||
total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid, limit=-1,
|
||||
prefered_uuids=set(self._captures_index.keys()))
|
||||
for h_capture_uuid, url_uuid, url_hostname, same_url in details:
|
||||
|
@ -1082,7 +1084,7 @@ class Lookyloo():
|
|||
captures_list['different_url'].sort(key=lambda y: y[3])
|
||||
return total_captures, captures_list
|
||||
|
||||
def get_ressource(self, tree_uuid: str, /, urlnode_uuid: str, h: Optional[str]) -> Optional[Tuple[str, BytesIO, str]]:
|
||||
def get_ressource(self, tree_uuid: str, /, urlnode_uuid: str, h: str | None) -> tuple[str, BytesIO, str] | None:
|
||||
'''Get a specific resource from a URL node. If a hash s also given, we want an embeded resource'''
|
||||
try:
|
||||
url = self.get_urlnode_from_tree(tree_uuid, urlnode_uuid)
|
||||
|
@ -1108,7 +1110,7 @@ class Lookyloo():
|
|||
return 'embedded_ressource.bin', BytesIO(blob.getvalue()), mimetype
|
||||
return None
|
||||
|
||||
def __misp_add_vt_to_URLObject(self, obj: MISPObject) -> Optional[MISPObject]:
|
||||
def __misp_add_vt_to_URLObject(self, obj: MISPObject) -> MISPObject | None:
|
||||
urls = obj.get_attributes_by_relation('url')
|
||||
if not urls:
|
||||
return None
|
||||
|
@ -1124,7 +1126,7 @@ class Lookyloo():
|
|||
obj.add_reference(vt_obj, 'analysed-with')
|
||||
return vt_obj
|
||||
|
||||
def __misp_add_urlscan_to_event(self, capture_uuid: str, visibility: str) -> Optional[MISPAttribute]:
|
||||
def __misp_add_urlscan_to_event(self, capture_uuid: str, visibility: str) -> MISPAttribute | None:
|
||||
if cache := self.capture_cache(capture_uuid):
|
||||
response = self.urlscan.url_submit(cache, visibility)
|
||||
if 'result' in response:
|
||||
|
@ -1134,7 +1136,7 @@ class Lookyloo():
|
|||
return attribute
|
||||
return None
|
||||
|
||||
def misp_export(self, capture_uuid: str, /, with_parent: bool=False) -> Union[List[MISPEvent], Dict[str, str]]:
|
||||
def misp_export(self, capture_uuid: str, /, with_parent: bool=False) -> list[MISPEvent] | dict[str, str]:
|
||||
'''Export a capture in MISP format. You can POST the return of this method
|
||||
directly to a MISP instance and it will create an event.'''
|
||||
cache = self.capture_cache(capture_uuid)
|
||||
|
@ -1200,7 +1202,7 @@ class Lookyloo():
|
|||
|
||||
return [event]
|
||||
|
||||
def get_misp_occurrences(self, capture_uuid: str, /, *, instance_name: Optional[str]=None) -> Optional[Tuple[Dict[str, Set[str]], str]]:
|
||||
def get_misp_occurrences(self, capture_uuid: str, /, *, instance_name: str | None=None) -> tuple[dict[str, set[str]], str] | None:
|
||||
if instance_name is None:
|
||||
misp = self.misps.default_misp
|
||||
elif self.misps.get(instance_name) is not None:
|
||||
|
@ -1217,7 +1219,7 @@ class Lookyloo():
|
|||
self.logger.warning(f'Unable to get the modules responses unless the tree ({capture_uuid}) is cached.')
|
||||
return None
|
||||
nodes_to_lookup = ct.root_hartree.rendered_node.get_ancestors() + [ct.root_hartree.rendered_node]
|
||||
to_return: Dict[str, Set[str]] = defaultdict(set)
|
||||
to_return: dict[str, set[str]] = defaultdict(set)
|
||||
for node in nodes_to_lookup:
|
||||
hits = misp.lookup(node, ct.root_hartree.get_host_node_by_uuid(node.hostnode_uuid))
|
||||
for event_id, values in hits.items():
|
||||
|
@ -1226,7 +1228,7 @@ class Lookyloo():
|
|||
to_return[event_id].update(values)
|
||||
return to_return, misp.client.root_url
|
||||
|
||||
def get_hashes_with_context(self, tree_uuid: str, /, algorithm: str, *, urls_only: bool=False) -> Union[Dict[str, Set[str]], Dict[str, List[URLNode]]]:
|
||||
def get_hashes_with_context(self, tree_uuid: str, /, algorithm: str, *, urls_only: bool=False) -> dict[str, set[str]] | dict[str, list[URLNode]]:
|
||||
"""Build (on demand) hashes for all the ressources of the tree, using the alorighm provided by the user.
|
||||
If you just want the hashes in SHA512, use the get_hashes method, it gives you a list of hashes an they're build
|
||||
with the tree. This method is computing the hashes when you query it, so it is slower."""
|
||||
|
@ -1236,7 +1238,7 @@ class Lookyloo():
|
|||
return {h: {node.name for node in nodes} for h, nodes in hashes.items()}
|
||||
return hashes
|
||||
|
||||
def merge_hashlookup_tree(self, tree_uuid: str, /) -> Tuple[Dict[str, Dict[str, Any]], int]:
|
||||
def merge_hashlookup_tree(self, tree_uuid: str, /) -> tuple[dict[str, dict[str, Any]], int]:
|
||||
if not self.hashlookup.available:
|
||||
raise LookylooException('Hashlookup module not enabled.')
|
||||
hashes_tree = self.get_hashes_with_context(tree_uuid, algorithm='sha1')
|
||||
|
@ -1253,20 +1255,20 @@ class Lookyloo():
|
|||
with hashlookup_file.open() as f:
|
||||
hashlookup_entries = json.load(f)
|
||||
|
||||
to_return: Dict[str, Dict[str, Any]] = defaultdict(dict)
|
||||
to_return: dict[str, dict[str, Any]] = defaultdict(dict)
|
||||
|
||||
for sha1 in hashlookup_entries.keys():
|
||||
to_return[sha1]['nodes'] = hashes_tree[sha1]
|
||||
to_return[sha1]['hashlookup'] = hashlookup_entries[sha1]
|
||||
return to_return, len(hashes_tree)
|
||||
|
||||
def get_hashes(self, tree_uuid: str, /, hostnode_uuid: Optional[str]=None, urlnode_uuid: Optional[str]=None) -> Set[str]:
|
||||
def get_hashes(self, tree_uuid: str, /, hostnode_uuid: str | None=None, urlnode_uuid: str | None=None) -> set[str]:
|
||||
"""Return hashes (sha512) of resources.
|
||||
Only tree_uuid: All the hashes
|
||||
tree_uuid and hostnode_uuid: hashes of all the resources in that hostnode (including embedded ressources)
|
||||
tree_uuid, hostnode_uuid, and urlnode_uuid: hash of the URL node body, and embedded resources
|
||||
"""
|
||||
container: Union[CrawledTree, HostNode, URLNode]
|
||||
container: CrawledTree | HostNode | URLNode
|
||||
if urlnode_uuid:
|
||||
container = self.get_urlnode_from_tree(tree_uuid, urlnode_uuid)
|
||||
elif hostnode_uuid:
|
||||
|
@ -1275,7 +1277,7 @@ class Lookyloo():
|
|||
container = self.get_crawled_tree(tree_uuid)
|
||||
return get_resources_hashes(container)
|
||||
|
||||
def get_hostnames(self, tree_uuid: str, /, hostnode_uuid: Optional[str]=None, urlnode_uuid: Optional[str]=None) -> Set[str]:
|
||||
def get_hostnames(self, tree_uuid: str, /, hostnode_uuid: str | None=None, urlnode_uuid: str | None=None) -> set[str]:
|
||||
"""Return all the unique hostnames:
|
||||
* of a complete tree if no hostnode_uuid and urlnode_uuid are given
|
||||
* of a HostNode if hostnode_uuid is given
|
||||
|
@ -1291,7 +1293,7 @@ class Lookyloo():
|
|||
ct = self.get_crawled_tree(tree_uuid)
|
||||
return {node.name for node in ct.root_hartree.hostname_tree.traverse()}
|
||||
|
||||
def get_urls(self, tree_uuid: str, /, hostnode_uuid: Optional[str]=None, urlnode_uuid: Optional[str]=None) -> Set[str]:
|
||||
def get_urls(self, tree_uuid: str, /, hostnode_uuid: str | None=None, urlnode_uuid: str | None=None) -> set[str]:
|
||||
"""Return all the unique URLs:
|
||||
* of a complete tree if no hostnode_uuid and urlnode_uuid are given
|
||||
* of a HostNode if hostnode_uuid is given
|
||||
|
@ -1307,18 +1309,18 @@ class Lookyloo():
|
|||
ct = self.get_crawled_tree(tree_uuid)
|
||||
return {node.name for node in ct.root_hartree.url_tree.traverse()}
|
||||
|
||||
def get_playwright_devices(self) -> Dict:
|
||||
def get_playwright_devices(self) -> dict[str, Any]:
|
||||
"""Get the preconfigured devices from Playwright"""
|
||||
return get_devices()
|
||||
|
||||
def get_hostnode_investigator(self, capture_uuid: str, /, node_uuid: str) -> Tuple[HostNode, List[Dict[str, Any]]]:
|
||||
def get_hostnode_investigator(self, capture_uuid: str, /, node_uuid: str) -> tuple[HostNode, list[dict[str, Any]]]:
|
||||
'''Gather all the informations needed to display the Hostnode investigator popup.'''
|
||||
|
||||
def normalize_known_content(h: str, /, known_content: Dict[str, Any], url: URLNode) -> Tuple[Optional[Union[str, List[Any]]], Optional[Tuple[bool, Any]]]:
|
||||
def normalize_known_content(h: str, /, known_content: dict[str, Any], url: URLNode) -> tuple[str | list[Any] | None, tuple[bool, Any] | None]:
|
||||
''' There are a few different sources to figure out known vs. legitimate content,
|
||||
this method normalize it for the web interface.'''
|
||||
known: Optional[Union[str, List[Any]]] = None
|
||||
legitimate: Optional[Tuple[bool, Any]] = None
|
||||
known: str | list[Any] | None = None
|
||||
legitimate: tuple[bool, Any] | None = None
|
||||
if h not in known_content:
|
||||
return known, legitimate
|
||||
|
||||
|
@ -1340,13 +1342,13 @@ class Lookyloo():
|
|||
known_content = self.context.find_known_content(hostnode)
|
||||
self.uwhois.query_whois_hostnode(hostnode)
|
||||
|
||||
urls: List[Dict[str, Any]] = []
|
||||
urls: list[dict[str, Any]] = []
|
||||
for url in hostnode.urls:
|
||||
# For the popup, we need:
|
||||
# * https vs http
|
||||
# * everything after the domain
|
||||
# * the full URL
|
||||
to_append: Dict[str, Any] = {
|
||||
to_append: dict[str, Any] = {
|
||||
'encrypted': url.name.startswith('https'),
|
||||
'url_path': url.name.split('/', 3)[-1],
|
||||
'url_object': url,
|
||||
|
@ -1389,7 +1391,7 @@ class Lookyloo():
|
|||
|
||||
# Optional: Cookies sent to server in request -> map to nodes who set the cookie in response
|
||||
if hasattr(url, 'cookies_sent'):
|
||||
to_display_sent: Dict[str, Set[Iterable[Optional[str]]]] = defaultdict(set)
|
||||
to_display_sent: dict[str, set[Iterable[str | None]]] = defaultdict(set)
|
||||
for cookie, contexts in url.cookies_sent.items():
|
||||
if not contexts:
|
||||
# Locally created?
|
||||
|
@ -1401,7 +1403,7 @@ class Lookyloo():
|
|||
|
||||
# Optional: Cookies received from server in response -> map to nodes who send the cookie in request
|
||||
if hasattr(url, 'cookies_received'):
|
||||
to_display_received: Dict[str, Dict[str, Set[Iterable[Optional[str]]]]] = {'3rd_party': defaultdict(set), 'sent': defaultdict(set), 'not_sent': defaultdict(set)}
|
||||
to_display_received: dict[str, dict[str, set[Iterable[str | None]]]] = {'3rd_party': defaultdict(set), 'sent': defaultdict(set), 'not_sent': defaultdict(set)}
|
||||
for domain, c_received, is_3rd_party in url.cookies_received:
|
||||
if c_received not in ct.root_hartree.cookies_sent:
|
||||
# This cookie is never sent.
|
||||
|
@ -1421,14 +1423,14 @@ class Lookyloo():
|
|||
urls.append(to_append)
|
||||
return hostnode, urls
|
||||
|
||||
def get_stats(self) -> Dict[str, List]:
|
||||
def get_stats(self) -> dict[str, list[Any]]:
|
||||
'''Gather statistics about the lookyloo instance'''
|
||||
today = date.today()
|
||||
calendar_week = today.isocalendar()[1]
|
||||
|
||||
stats_dict = {'submissions': 0, 'redirects': 0}
|
||||
stats: Dict[int, Dict[int, Dict[str, Any]]] = {}
|
||||
weeks_stats: Dict[int, Dict] = {}
|
||||
stats: dict[int, dict[int, dict[str, Any]]] = {}
|
||||
weeks_stats: dict[int, dict[str, Any]] = {}
|
||||
|
||||
# Only recent captures that are not archived
|
||||
for cache in self.sorted_capture_cache():
|
||||
|
@ -1467,7 +1469,7 @@ class Lookyloo():
|
|||
stats[capture_ts.year][capture_ts.month] = {'submissions': 0}
|
||||
stats[capture_ts.year][capture_ts.month]['submissions'] += 1
|
||||
|
||||
statistics: Dict[str, List] = {'weeks': [], 'years': []}
|
||||
statistics: dict[str, list[Any]] = {'weeks': [], 'years': []}
|
||||
for week_number in sorted(weeks_stats.keys()):
|
||||
week_stat = weeks_stats[week_number]
|
||||
urls = week_stat.pop('uniq_urls')
|
||||
|
@ -1477,7 +1479,7 @@ class Lookyloo():
|
|||
statistics['weeks'].append(week_stat)
|
||||
|
||||
for year in sorted(stats.keys()):
|
||||
year_stats: Dict[str, Union[int, List]] = {'year': year, 'months': [], 'yearly_submissions': 0}
|
||||
year_stats: dict[str, int | list[Any]] = {'year': year, 'months': [], 'yearly_submissions': 0}
|
||||
for month in sorted(stats[year].keys()):
|
||||
month_stats = stats[year][month]
|
||||
if len(month_stats) == 1:
|
||||
|
@ -1496,15 +1498,15 @@ class Lookyloo():
|
|||
return statistics
|
||||
|
||||
def store_capture(self, uuid: str, is_public: bool,
|
||||
os: Optional[str]=None, browser: Optional[str]=None,
|
||||
parent: Optional[str]=None,
|
||||
downloaded_filename: Optional[str]=None, downloaded_file: Optional[bytes]=None,
|
||||
error: Optional[str]=None, har: Optional[Dict[str, Any]]=None,
|
||||
png: Optional[bytes]=None, html: Optional[str]=None,
|
||||
last_redirected_url: Optional[str]=None,
|
||||
cookies: Optional[Union[List['Cookie'], List[Dict[str, str]]]]=None,
|
||||
capture_settings: Optional[CaptureSettings]=None,
|
||||
potential_favicons: Optional[Set[bytes]]=None
|
||||
os: str | None=None, browser: str | None=None,
|
||||
parent: str | None=None,
|
||||
downloaded_filename: str | None=None, downloaded_file: bytes | None=None,
|
||||
error: str | None=None, har: dict[str, Any] | None=None,
|
||||
png: bytes | None=None, html: str | None=None,
|
||||
last_redirected_url: str | None=None,
|
||||
cookies: list[Cookie] | list[dict[str, str]] | None=None,
|
||||
capture_settings: CaptureSettings | None=None,
|
||||
potential_favicons: set[bytes] | None=None
|
||||
) -> None:
|
||||
|
||||
now = datetime.now()
|
||||
|
@ -1512,7 +1514,7 @@ class Lookyloo():
|
|||
safe_create_dir(dirpath)
|
||||
|
||||
if os or browser:
|
||||
meta: Dict[str, str] = {}
|
||||
meta: dict[str, str] = {}
|
||||
if os:
|
||||
meta['os'] = os
|
||||
if browser:
|
||||
|
|
|
@ -14,3 +14,22 @@ from .riskiq import RiskIQ, RiskIQError # noqa
|
|||
from .urlhaus import URLhaus # noqa
|
||||
from .cloudflare import Cloudflare # noqa
|
||||
from .circlpdns import CIRCLPDNS # noqa
|
||||
|
||||
__all__ = [
|
||||
'FOX',
|
||||
'MISPs',
|
||||
'MISP',
|
||||
'PhishingInitiative',
|
||||
'SaneJavaScript',
|
||||
'UrlScan',
|
||||
'UniversalWhois',
|
||||
'VirusTotal',
|
||||
'Pandora',
|
||||
'Phishtank',
|
||||
'Hashlookup',
|
||||
'RiskIQ',
|
||||
'RiskIQError',
|
||||
'URLhaus',
|
||||
'Cloudflare',
|
||||
'CIRCLPDNS'
|
||||
]
|
||||
|
|
|
@ -1,12 +1,14 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from datetime import date
|
||||
from typing import Dict, List, Optional, TYPE_CHECKING
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from pypdns import PyPDNS, PDNSRecord
|
||||
from pypdns import PyPDNS, PDNSRecord # type: ignore[attr-defined]
|
||||
|
||||
from ..default import ConfigError, get_homedir
|
||||
from ..helpers import get_cache_directory
|
||||
|
@ -32,7 +34,7 @@ class CIRCLPDNS(AbstractModule):
|
|||
self.storage_dir_pypdns.mkdir(parents=True, exist_ok=True)
|
||||
return True
|
||||
|
||||
def get_passivedns(self, query: str) -> Optional[List[PDNSRecord]]:
|
||||
def get_passivedns(self, query: str) -> list[PDNSRecord] | None:
|
||||
# The query can be IP or Hostname. For now, we only do it on domains.
|
||||
url_storage_dir = get_cache_directory(self.storage_dir_pypdns, query, 'pdns')
|
||||
if not url_storage_dir.exists():
|
||||
|
@ -44,7 +46,7 @@ class CIRCLPDNS(AbstractModule):
|
|||
with cached_entries[0].open() as f:
|
||||
return [PDNSRecord(record) for record in json.load(f)]
|
||||
|
||||
def capture_default_trigger(self, cache: 'CaptureCache', /, *, force: bool=False, auto_trigger: bool=False) -> Dict:
|
||||
def capture_default_trigger(self, cache: CaptureCache, /, *, force: bool=False, auto_trigger: bool=False) -> dict[str, str]:
|
||||
'''Run the module on all the nodes up to the final redirect'''
|
||||
if not self.available:
|
||||
return {'error': 'Module not available'}
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from typing import Dict
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Dict, Any
|
||||
|
||||
import requests
|
||||
|
||||
|
@ -29,7 +31,7 @@ class FOX(AbstractModule):
|
|||
|
||||
return True
|
||||
|
||||
def capture_default_trigger(self, url: str, /, auto_trigger: bool=False) -> Dict:
|
||||
def capture_default_trigger(self, url: str, /, auto_trigger: bool=False) -> dict[str, str]:
|
||||
'''Run the module on the initial URL'''
|
||||
if not self.available:
|
||||
return {'error': 'Module not available'}
|
||||
|
@ -52,7 +54,7 @@ class FOX(AbstractModule):
|
|||
response.raise_for_status()
|
||||
return True
|
||||
|
||||
def url_submit(self, url: str) -> Dict:
|
||||
def url_submit(self, url: str) -> dict[str, Any]:
|
||||
'''Submit a URL to FOX
|
||||
'''
|
||||
if not self.available:
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Dict, List
|
||||
|
||||
from har2tree import CrawledTree
|
||||
from pyhashlookup import Hashlookup
|
||||
from har2tree import CrawledTree # type: ignore[attr-defined]
|
||||
from pyhashlookup import Hashlookup # type: ignore[attr-defined]
|
||||
|
||||
from ..default import ConfigError
|
||||
from ..helpers import get_useragent_for_requests
|
||||
|
@ -31,7 +33,7 @@ class HashlookupModule(AbstractModule):
|
|||
self.allow_auto_trigger = bool(self.config.get('allow_auto_trigger', False))
|
||||
return True
|
||||
|
||||
def capture_default_trigger(self, crawled_tree: CrawledTree, /, *, auto_trigger: bool=False) -> Dict:
|
||||
def capture_default_trigger(self, crawled_tree: CrawledTree, /, *, auto_trigger: bool=False) -> dict[str, str]:
|
||||
'''Run the module on all the nodes up to the final redirect'''
|
||||
if not self.available:
|
||||
return {'error': 'Module not available'}
|
||||
|
@ -52,14 +54,14 @@ class HashlookupModule(AbstractModule):
|
|||
|
||||
return {'success': 'Module triggered'}
|
||||
|
||||
def hashes_lookup(self, hashes: List[str]) -> Dict[str, Dict[str, str]]:
|
||||
def hashes_lookup(self, hashes: list[str]) -> dict[str, dict[str, str]]:
|
||||
'''Lookup a list of hashes against Hashlookup
|
||||
Note: It will trigger a request to hashlookup every time *until* there is a hit, then once a day.
|
||||
'''
|
||||
if not self.available:
|
||||
raise ConfigError('Hashlookup not available, probably not enabled.')
|
||||
|
||||
to_return: Dict[str, Dict[str, str]] = {}
|
||||
to_return: dict[str, dict[str, str]] = {}
|
||||
for entry in self.client.sha1_bulk_lookup(hashes):
|
||||
if 'SHA-1' in entry:
|
||||
to_return[entry['SHA-1'].lower()] = entry
|
||||
|
|
|
@ -5,12 +5,12 @@ import re
|
|||
from io import BytesIO
|
||||
from collections import defaultdict
|
||||
from collections.abc import Mapping
|
||||
from typing import Any, Dict, List, Optional, Set, Union, TYPE_CHECKING
|
||||
from typing import Any, Dict, List, Optional, Set, Union, TYPE_CHECKING, Iterator
|
||||
|
||||
import requests
|
||||
from har2tree import HostNode, URLNode, Har2TreeError
|
||||
from pymisp import MISPAttribute, MISPEvent, PyMISP
|
||||
from pymisp.tools import FileObject, URLObject
|
||||
from har2tree import HostNode, URLNode, Har2TreeError # type: ignore[attr-defined]
|
||||
from pymisp import MISPAttribute, MISPEvent, PyMISP, MISPTag # type: ignore[attr-defined]
|
||||
from pymisp.tools import FileObject, URLObject # type: ignore[attr-defined]
|
||||
|
||||
from ..default import get_config, get_homedir
|
||||
from ..helpers import get_public_suffix_list
|
||||
|
@ -21,7 +21,7 @@ if TYPE_CHECKING:
|
|||
from ..capturecache import CaptureCache
|
||||
|
||||
|
||||
class MISPs(Mapping, AbstractModule):
|
||||
class MISPs(Mapping, AbstractModule): # type: ignore[type-arg]
|
||||
|
||||
def module_init(self) -> bool:
|
||||
if not self.config.get('default'):
|
||||
|
@ -37,7 +37,7 @@ class MISPs(Mapping, AbstractModule):
|
|||
self.logger.warning(f"The default MISP instance ({self.default_instance}) is missing in the instances ({', '.join(self.config['instances'].keys())}), disabling MISP.")
|
||||
return False
|
||||
|
||||
self.__misps: Dict[str, 'MISP'] = {}
|
||||
self.__misps = {}
|
||||
for instance_name, instance_config in self.config['instances'].items():
|
||||
if misp_connector := MISP(config=instance_config):
|
||||
if misp_connector.available:
|
||||
|
@ -56,10 +56,10 @@ class MISPs(Mapping, AbstractModule):
|
|||
def __getitem__(self, name: str) -> 'MISP':
|
||||
return self.__misps[name]
|
||||
|
||||
def __iter__(self):
|
||||
def __iter__(self) -> Iterator[dict[str, 'MISP']]:
|
||||
return iter(self.__misps)
|
||||
|
||||
def __len__(self):
|
||||
def __len__(self) -> int:
|
||||
return len(self.__misps)
|
||||
|
||||
@property
|
||||
|
@ -170,10 +170,10 @@ class MISP(AbstractModule):
|
|||
self.psl = get_public_suffix_list()
|
||||
return True
|
||||
|
||||
def get_fav_tags(self):
|
||||
def get_fav_tags(self) -> dict[Any, Any] | list[MISPTag]:
|
||||
return self.client.tags(pythonify=True, favouritesOnly=1)
|
||||
|
||||
def _prepare_push(self, to_push: Union[List[MISPEvent], MISPEvent], allow_duplicates: bool=False, auto_publish: Optional[bool]=False) -> Union[List[MISPEvent], Dict]:
|
||||
def _prepare_push(self, to_push: Union[List[MISPEvent], MISPEvent], allow_duplicates: bool=False, auto_publish: Optional[bool]=False) -> Union[List[MISPEvent], Dict[str, str]]:
|
||||
'''Adds the pre-configured information as required by the instance.
|
||||
If duplicates aren't allowed, they will be automatically skiped and the
|
||||
extends_uuid key in the next element in the list updated'''
|
||||
|
@ -196,11 +196,11 @@ class MISP(AbstractModule):
|
|||
for tag in self.default_tags:
|
||||
event.add_tag(tag)
|
||||
if auto_publish:
|
||||
event.publish()
|
||||
event.publish() # type: ignore[no-untyped-call]
|
||||
events_to_push.append(event)
|
||||
return events_to_push
|
||||
|
||||
def push(self, to_push: Union[List[MISPEvent], MISPEvent], allow_duplicates: bool=False, auto_publish: Optional[bool]=None) -> Union[List[MISPEvent], Dict]:
|
||||
def push(self, to_push: Union[List[MISPEvent], MISPEvent], allow_duplicates: bool=False, auto_publish: Optional[bool]=None) -> Union[List[MISPEvent], Dict[Any, Any]]:
|
||||
if auto_publish is None:
|
||||
auto_publish = self.auto_publish
|
||||
if self.available and self.enable_push:
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from io import BytesIO
|
||||
from typing import Dict
|
||||
from __future__ import annotations
|
||||
|
||||
from pypandora import PyPandora
|
||||
from io import BytesIO
|
||||
from typing import Dict, Any
|
||||
|
||||
from pypandora import PyPandora # type: ignore[attr-defined]
|
||||
|
||||
from ..default import ConfigError
|
||||
from ..helpers import get_useragent_for_requests
|
||||
|
@ -27,7 +29,7 @@ class Pandora(AbstractModule):
|
|||
|
||||
return True
|
||||
|
||||
def capture_default_trigger(self, file_in_memory: BytesIO, filename: str, /, auto_trigger: bool=False) -> Dict:
|
||||
def capture_default_trigger(self, file_in_memory: BytesIO, filename: str, /, auto_trigger: bool=False) -> dict[str, str]:
|
||||
'''Automatically submit the file if the landing URL is a file instead of a webpage'''
|
||||
if not self.available:
|
||||
return {'error': 'Module not available'}
|
||||
|
@ -39,7 +41,7 @@ class Pandora(AbstractModule):
|
|||
self.submit_file(file_in_memory, filename)
|
||||
return {'success': 'Module triggered'}
|
||||
|
||||
def submit_file(self, file_in_memory: BytesIO, filename: str) -> Dict:
|
||||
def submit_file(self, file_in_memory: BytesIO, filename: str) -> dict[str, Any]:
|
||||
'''Submit a file to Pandora'''
|
||||
if not self.available:
|
||||
raise ConfigError('Pandora not available, probably not able to reach the server.')
|
||||
|
|
|
@ -1,11 +1,13 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from datetime import date, datetime, timedelta, timezone
|
||||
from typing import Any, Dict, Optional, List, TYPE_CHECKING
|
||||
|
||||
from pyphishtanklookup import PhishtankLookup
|
||||
from pyphishtanklookup import PhishtankLookup # type: ignore[attr-defined]
|
||||
|
||||
from ..default import ConfigError, get_homedir
|
||||
from ..helpers import get_cache_directory
|
||||
|
@ -38,7 +40,7 @@ class Phishtank(AbstractModule):
|
|||
self.storage_dir_pt.mkdir(parents=True, exist_ok=True)
|
||||
return True
|
||||
|
||||
def get_url_lookup(self, url: str) -> Optional[Dict[str, Any]]:
|
||||
def get_url_lookup(self, url: str) -> dict[str, Any] | None:
|
||||
url_storage_dir = get_cache_directory(self.storage_dir_pt, url, 'url')
|
||||
if not url_storage_dir.exists():
|
||||
return None
|
||||
|
@ -49,10 +51,10 @@ class Phishtank(AbstractModule):
|
|||
with cached_entries[0].open() as f:
|
||||
return json.load(f)
|
||||
|
||||
def lookup_ips_capture(self, cache: 'CaptureCache') -> Dict[str, List[Dict[str, Any]]]:
|
||||
def lookup_ips_capture(self, cache: CaptureCache) -> dict[str, list[dict[str, Any]]]:
|
||||
with (cache.capture_dir / 'ips.json').open() as f:
|
||||
ips_dump = json.load(f)
|
||||
to_return: Dict[str, List[Dict[str, Any]]] = {}
|
||||
to_return: dict[str, list[dict[str, Any]]] = {}
|
||||
for ip in {ip for ips_list in ips_dump.values() for ip in ips_list}:
|
||||
entry = self.get_ip_lookup(ip)
|
||||
if not entry:
|
||||
|
@ -64,7 +66,7 @@ class Phishtank(AbstractModule):
|
|||
to_return[ip].append(entry)
|
||||
return to_return
|
||||
|
||||
def get_ip_lookup(self, ip: str) -> Optional[Dict[str, Any]]:
|
||||
def get_ip_lookup(self, ip: str) -> dict[str, Any] | None:
|
||||
ip_storage_dir = get_cache_directory(self.storage_dir_pt, ip, 'ip')
|
||||
if not ip_storage_dir.exists():
|
||||
return None
|
||||
|
@ -75,7 +77,7 @@ class Phishtank(AbstractModule):
|
|||
with cached_entries[0].open() as f:
|
||||
return json.load(f)
|
||||
|
||||
def capture_default_trigger(self, cache: 'CaptureCache', /, *, auto_trigger: bool=False) -> Dict:
|
||||
def capture_default_trigger(self, cache: CaptureCache, /, *, auto_trigger: bool=False) -> dict[str, str]:
|
||||
'''Run the module on all the nodes up to the final redirect'''
|
||||
if not self.available:
|
||||
return {'error': 'Module not available'}
|
||||
|
|
|
@ -1,12 +1,14 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import time
|
||||
|
||||
from datetime import date
|
||||
from typing import Any, Dict, Optional, TYPE_CHECKING
|
||||
|
||||
from pyeupi import PyEUPI
|
||||
from pyeupi import PyEUPI # type: ignore[attr-defined]
|
||||
|
||||
from ..default import ConfigError, get_homedir
|
||||
from ..helpers import get_cache_directory
|
||||
|
@ -34,7 +36,7 @@ class PhishingInitiative(AbstractModule):
|
|||
self.storage_dir_eupi.mkdir(parents=True, exist_ok=True)
|
||||
return True
|
||||
|
||||
def get_url_lookup(self, url: str) -> Optional[Dict[str, Any]]:
|
||||
def get_url_lookup(self, url: str) -> dict[str, Any] | None:
|
||||
url_storage_dir = get_cache_directory(self.storage_dir_eupi, url)
|
||||
if not url_storage_dir.exists():
|
||||
return None
|
||||
|
@ -45,7 +47,7 @@ class PhishingInitiative(AbstractModule):
|
|||
with cached_entries[0].open() as f:
|
||||
return json.load(f)
|
||||
|
||||
def capture_default_trigger(self, cache: 'CaptureCache', /, *, force: bool=False, auto_trigger: bool=False) -> Dict:
|
||||
def capture_default_trigger(self, cache: CaptureCache, /, *, force: bool=False, auto_trigger: bool=False) -> dict[str, str]:
|
||||
'''Run the module on all the nodes up to the final redirect'''
|
||||
if not self.available:
|
||||
return {'error': 'Module not available'}
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from datetime import date, datetime, timedelta
|
||||
|
@ -56,7 +58,7 @@ class RiskIQ(AbstractModule):
|
|||
self.storage_dir_riskiq.mkdir(parents=True, exist_ok=True)
|
||||
return True
|
||||
|
||||
def get_passivedns(self, query: str) -> Optional[Dict[str, Any]]:
|
||||
def get_passivedns(self, query: str) -> dict[str, Any] | None:
|
||||
# The query can be IP or Hostname. For now, we only do it on domains.
|
||||
url_storage_dir = get_cache_directory(self.storage_dir_riskiq, query, 'pdns')
|
||||
if not url_storage_dir.exists():
|
||||
|
@ -68,7 +70,7 @@ class RiskIQ(AbstractModule):
|
|||
with cached_entries[0].open() as f:
|
||||
return json.load(f)
|
||||
|
||||
def capture_default_trigger(self, cache: 'CaptureCache', /, *, force: bool=False, auto_trigger: bool=False) -> Dict:
|
||||
def capture_default_trigger(self, cache: CaptureCache, /, *, force: bool=False, auto_trigger: bool=False) -> dict[str, str]:
|
||||
'''Run the module on all the nodes up to the final redirect'''
|
||||
if not self.available:
|
||||
return {'error': 'Module not available'}
|
||||
|
@ -88,7 +90,7 @@ class RiskIQ(AbstractModule):
|
|||
self.pdns_lookup(hostname, force)
|
||||
return {'success': 'Module triggered'}
|
||||
|
||||
def pdns_lookup(self, hostname: str, force: bool=False, first_seen: Optional[Union[date, datetime]]=None) -> None:
|
||||
def pdns_lookup(self, hostname: str, force: bool=False, first_seen: date | datetime | None=None) -> None:
|
||||
'''Lookup an hostname on RiskIQ Passive DNS
|
||||
Note: force means re-fetch the entry RiskIQ even if we already did it today
|
||||
'''
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import date
|
||||
from typing import Dict, Iterable, List, Union
|
||||
|
||||
from pysanejs import SaneJS
|
||||
from pysanejs import SaneJS # type: ignore[attr-defined]
|
||||
|
||||
from ..default import get_homedir
|
||||
|
||||
|
@ -29,7 +31,7 @@ class SaneJavaScript(AbstractModule):
|
|||
self.storage_dir.mkdir(parents=True, exist_ok=True)
|
||||
return True
|
||||
|
||||
def hashes_lookup(self, sha512: Union[Iterable[str], str], force: bool=False) -> Dict[str, List[str]]:
|
||||
def hashes_lookup(self, sha512: Iterable[str] | str, force: bool=False) -> dict[str, list[str]]:
|
||||
if isinstance(sha512, str):
|
||||
hashes: Iterable[str] = [sha512]
|
||||
else:
|
||||
|
@ -43,7 +45,7 @@ class SaneJavaScript(AbstractModule):
|
|||
with sanejs_unknowns.open() as f:
|
||||
unknown_hashes = {line.strip() for line in f.readlines()}
|
||||
|
||||
to_return: Dict[str, List[str]] = {}
|
||||
to_return: dict[str, list[str]] = {}
|
||||
|
||||
if force:
|
||||
to_lookup = hashes
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import date
|
||||
from typing import Any, Dict, Optional, TYPE_CHECKING
|
||||
|
@ -29,7 +31,7 @@ class URLhaus(AbstractModule):
|
|||
self.storage_dir_uh.mkdir(parents=True, exist_ok=True)
|
||||
return True
|
||||
|
||||
def get_url_lookup(self, url: str) -> Optional[Dict[str, Any]]:
|
||||
def get_url_lookup(self, url: str) -> dict[str, Any] | None:
|
||||
url_storage_dir = get_cache_directory(self.storage_dir_uh, url, 'url')
|
||||
if not url_storage_dir.exists():
|
||||
return None
|
||||
|
@ -40,13 +42,13 @@ class URLhaus(AbstractModule):
|
|||
with cached_entries[0].open() as f:
|
||||
return json.load(f)
|
||||
|
||||
def __url_result(self, url: str) -> Dict:
|
||||
def __url_result(self, url: str) -> dict[str, Any]:
|
||||
data = {'url': url}
|
||||
response = requests.post(f'{self.url}/url/', data)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
def capture_default_trigger(self, cache: 'CaptureCache', /, *, auto_trigger: bool=False) -> Dict:
|
||||
def capture_default_trigger(self, cache: CaptureCache, /, *, auto_trigger: bool=False) -> dict[str, str]:
|
||||
'''Run the module on all the nodes up to the final redirect'''
|
||||
if not self.available:
|
||||
return {'error': 'Module not available'}
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import date
|
||||
from typing import Any, Dict, Optional, TYPE_CHECKING
|
||||
|
@ -47,7 +49,7 @@ class UrlScan(AbstractModule):
|
|||
self.storage_dir_urlscan.mkdir(parents=True, exist_ok=True)
|
||||
return True
|
||||
|
||||
def get_url_submission(self, capture_info: 'CaptureCache') -> Dict[str, Any]:
|
||||
def get_url_submission(self, capture_info: CaptureCache) -> dict[str, Any]:
|
||||
url_storage_dir = get_cache_directory(
|
||||
self.storage_dir_urlscan,
|
||||
f'{capture_info.url}{capture_info.user_agent}{capture_info.referer}',
|
||||
|
@ -61,7 +63,7 @@ class UrlScan(AbstractModule):
|
|||
with cached_entries[0].open() as f:
|
||||
return json.load(f)
|
||||
|
||||
def capture_default_trigger(self, capture_info: 'CaptureCache', /, visibility: str, *, force: bool=False, auto_trigger: bool=False) -> Dict:
|
||||
def capture_default_trigger(self, capture_info: CaptureCache, /, visibility: str, *, force: bool=False, auto_trigger: bool=False) -> dict[str, str]:
|
||||
'''Run the module on the initial URL'''
|
||||
if not self.available:
|
||||
return {'error': 'Module not available'}
|
||||
|
@ -75,7 +77,7 @@ class UrlScan(AbstractModule):
|
|||
self.url_submit(capture_info, visibility, force)
|
||||
return {'success': 'Module triggered'}
|
||||
|
||||
def __submit_url(self, url: str, useragent: Optional[str], referer: Optional[str], visibility: str) -> Dict:
|
||||
def __submit_url(self, url: str, useragent: str | None, referer: str | None, visibility: str) -> dict[str, Any]:
|
||||
data = {'customagent': useragent if useragent else '', 'referer': referer if referer else ''}
|
||||
|
||||
if not url.startswith('http'):
|
||||
|
@ -96,12 +98,12 @@ class UrlScan(AbstractModule):
|
|||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
def __url_result(self, uuid: str) -> Dict:
|
||||
def __url_result(self, uuid: str) -> dict[str, Any]:
|
||||
response = self.client.get(f'https://urlscan.io/api/v1/result/{uuid}')
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
def url_submit(self, capture_info: 'CaptureCache', visibility: str, force: bool=False) -> Dict:
|
||||
def url_submit(self, capture_info: CaptureCache, visibility: str, force: bool=False) -> dict[str, Any]:
|
||||
'''Lookup an URL on urlscan.io
|
||||
Note: force means 2 things:
|
||||
* (re)scan of the URL
|
||||
|
@ -142,7 +144,7 @@ class UrlScan(AbstractModule):
|
|||
return response
|
||||
return {'error': 'Submitting is not allowed by the configuration'}
|
||||
|
||||
def url_result(self, capture_info: 'CaptureCache'):
|
||||
def url_result(self, capture_info: CaptureCache) -> dict[str, Any]:
|
||||
'''Get the result from a submission.'''
|
||||
submission = self.get_url_submission(capture_info)
|
||||
if submission and 'uuid' in submission:
|
||||
|
|
|
@ -1,11 +1,13 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import socket
|
||||
|
||||
from typing import overload, Literal, List, Union
|
||||
|
||||
from har2tree import CrawledTree, Har2TreeError, HostNode
|
||||
from har2tree import CrawledTree, Har2TreeError, HostNode # type: ignore[attr-defined]
|
||||
|
||||
from .abstractmodule import AbstractModule
|
||||
|
||||
|
@ -62,7 +64,7 @@ class UniversalWhois(AbstractModule):
|
|||
self.query_whois_hostnode(n)
|
||||
|
||||
@overload
|
||||
def whois(self, query: str, contact_email_only: Literal[True]) -> List[str]:
|
||||
def whois(self, query: str, contact_email_only: Literal[True]) -> list[str]:
|
||||
...
|
||||
|
||||
@overload
|
||||
|
@ -70,10 +72,10 @@ class UniversalWhois(AbstractModule):
|
|||
...
|
||||
|
||||
@overload
|
||||
def whois(self, query: str, contact_email_only: bool=False) -> Union[str, List[str]]:
|
||||
def whois(self, query: str, contact_email_only: bool=False) -> str | list[str]:
|
||||
...
|
||||
|
||||
def whois(self, query: str, contact_email_only: bool=False) -> Union[str, List[str]]:
|
||||
def whois(self, query: str, contact_email_only: bool=False) -> str | list[str]:
|
||||
if not self.available:
|
||||
return ''
|
||||
bytes_whois = b''
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import time
|
||||
from datetime import date
|
||||
|
@ -18,9 +20,10 @@ if TYPE_CHECKING:
|
|||
from .abstractmodule import AbstractModule
|
||||
|
||||
|
||||
def jsonify_vt(obj: WhistleBlowerDict):
|
||||
def jsonify_vt(obj: WhistleBlowerDict) -> dict[str, Any]:
|
||||
if isinstance(obj, WhistleBlowerDict):
|
||||
return {k: v for k, v in obj.items()}
|
||||
return obj
|
||||
|
||||
|
||||
class VirusTotal(AbstractModule):
|
||||
|
@ -39,7 +42,7 @@ class VirusTotal(AbstractModule):
|
|||
self.storage_dir_vt.mkdir(parents=True, exist_ok=True)
|
||||
return True
|
||||
|
||||
def get_url_lookup(self, url: str) -> Optional[Dict[str, Any]]:
|
||||
def get_url_lookup(self, url: str) -> dict[str, Any] | None:
|
||||
url_storage_dir = get_cache_directory(self.storage_dir_vt, vt.url_id(url))
|
||||
if not url_storage_dir.exists():
|
||||
return None
|
||||
|
@ -54,7 +57,7 @@ class VirusTotal(AbstractModule):
|
|||
cached_entries[0].unlink(missing_ok=True)
|
||||
return None
|
||||
|
||||
def capture_default_trigger(self, cache: 'CaptureCache', /, *, force: bool=False, auto_trigger: bool=False) -> Dict:
|
||||
def capture_default_trigger(self, cache: CaptureCache, /, *, force: bool=False, auto_trigger: bool=False) -> dict[str, str]:
|
||||
'''Run the module on all the nodes up to the final redirect'''
|
||||
if not self.available:
|
||||
return {'error': 'Module not available'}
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
[mypy]
|
||||
strict = True
|
||||
warn_return_any = False
|
||||
show_error_context = True
|
||||
pretty = True
|
||||
|
||||
[mypy-docs.source.*]
|
||||
ignore_errors = True
|
|
@ -1447,18 +1447,18 @@ referencing = ">=0.31.0"
|
|||
|
||||
[[package]]
|
||||
name = "lacuscore"
|
||||
version = "1.7.8"
|
||||
version = "1.7.9"
|
||||
description = "Core of Lacus, usable as a module"
|
||||
optional = false
|
||||
python-versions = ">=3.8,<4.0"
|
||||
files = [
|
||||
{file = "lacuscore-1.7.8-py3-none-any.whl", hash = "sha256:b877567a7efb35802c5fb6a01a8b88602978c16b49ee0ceead937337c6710081"},
|
||||
{file = "lacuscore-1.7.8.tar.gz", hash = "sha256:e0aa938a6555c8fe8485777e04c2ca549cd3b1fd7a75e7839d49a3fef1499252"},
|
||||
{file = "lacuscore-1.7.9-py3-none-any.whl", hash = "sha256:74309aa4216fabffadd4ab724f8f2273d12e59dedd8e826e2710847d92497f8c"},
|
||||
{file = "lacuscore-1.7.9.tar.gz", hash = "sha256:cb0df82d88ffe805fc78c60e535ee54d82842b763a84ad97cfc2a5a99d4c3ed7"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
defang = ">=0.5.3,<0.6.0"
|
||||
playwrightcapture = {version = ">=1.22.5,<2.0.0", extras = ["recaptcha"]}
|
||||
playwrightcapture = {version = ">=1.22.6,<2.0.0", extras = ["recaptcha"]}
|
||||
redis = {version = ">=5.0.1,<6.0.0", extras = ["hiredis"]}
|
||||
requests = ">=2.31.0,<3.0.0"
|
||||
ua-parser = ">=0.18.0,<0.19.0"
|
||||
|
@ -2154,13 +2154,13 @@ test = ["pytest"]
|
|||
|
||||
[[package]]
|
||||
name = "playwrightcapture"
|
||||
version = "1.22.5"
|
||||
version = "1.22.6"
|
||||
description = "A simple library to capture websites using playwright"
|
||||
optional = false
|
||||
python-versions = ">=3.8,<4.0"
|
||||
files = [
|
||||
{file = "playwrightcapture-1.22.5-py3-none-any.whl", hash = "sha256:023d394efe2c6173178ac7a9143a9b77400704b965280c494e9bb418eaa2ea86"},
|
||||
{file = "playwrightcapture-1.22.5.tar.gz", hash = "sha256:8fac3bf723536ebc6ff0e1908aa838029a8b6e8ed1998fd162d5557d1d3fb2ec"},
|
||||
{file = "playwrightcapture-1.22.6-py3-none-any.whl", hash = "sha256:910ad4dabbc51864f1c8fed6e62c2869a519211bcf7ae6e9c5aac3ea29268e33"},
|
||||
{file = "playwrightcapture-1.22.6.tar.gz", hash = "sha256:b5c377585aba9ff71f055127b6be86458503ff3308e8fc8225dd4c05ab9597ae"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
@ -2173,7 +2173,7 @@ pytz = {version = ">=2023.3.post1,<2024.0", markers = "python_version < \"3.9\""
|
|||
requests = {version = ">=2.31.0,<3.0.0", extras = ["socks"], optional = true, markers = "extra == \"recaptcha\""}
|
||||
setuptools = ">=69.0.3,<70.0.0"
|
||||
SpeechRecognition = {version = ">=3.10.1,<4.0.0", optional = true, markers = "extra == \"recaptcha\""}
|
||||
tzdata = ">=2023.3,<2024.0"
|
||||
tzdata = ">=2023.4,<2024.0"
|
||||
w3lib = ">=2.1.2,<3.0.0"
|
||||
|
||||
[package.extras]
|
||||
|
@ -3592,4 +3592,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.8.1,<3.12"
|
||||
content-hash = "9e6afc44fccf8789e1968b698fc9a6632bfb7fb5d053a404356000386d1fd3ad"
|
||||
content-hash = "95ea92c4f809ea280840866efc4385f75bbb4c7ace7cb9ac4979c17df722fd02"
|
||||
|
|
|
@ -65,7 +65,7 @@ passivetotal = "^2.5.9"
|
|||
werkzeug = "^3.0.1"
|
||||
filetype = "^1.2.0"
|
||||
pypandora = "^1.6.1"
|
||||
lacuscore = "^1.7.8"
|
||||
lacuscore = "^1.7.9"
|
||||
pylacus = "^1.7.1"
|
||||
pyipasnhistory = "^2.1.2"
|
||||
publicsuffixlist = "^0.10.0.20231214"
|
||||
|
@ -103,17 +103,3 @@ types-pytz = "^2023.3.1.1"
|
|||
[build-system]
|
||||
requires = ["poetry_core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.mypy]
|
||||
check_untyped_defs = true
|
||||
ignore_errors = false
|
||||
ignore_missing_imports = false
|
||||
strict_optional = true
|
||||
no_implicit_optional = true
|
||||
warn_unused_ignores = true
|
||||
warn_redundant_casts = true
|
||||
warn_unused_configs = true
|
||||
warn_unreachable = true
|
||||
|
||||
show_error_context = true
|
||||
pretty = true
|
||||
|
|
|
@ -9,7 +9,7 @@ from lookyloo.default import safe_create_dir, get_socket_path
|
|||
from lookyloo.helpers import get_captures_dir
|
||||
|
||||
|
||||
def rename_captures():
|
||||
def rename_captures() -> None:
|
||||
r = Redis(unix_socket_path=get_socket_path('cache'))
|
||||
capture_dir: Path = get_captures_dir()
|
||||
for uuid_path in capture_dir.glob('*/uuid'):
|
||||
|
|
|
@ -9,7 +9,7 @@ import s3fs # type: ignore
|
|||
from lookyloo.default import get_config
|
||||
|
||||
|
||||
def check_path(path: str):
|
||||
def check_path(path: str) -> dict[str, str]:
|
||||
s3fs_config = get_config('generic', 's3fs')
|
||||
s3fs_client = s3fs.S3FileSystem(key=s3fs_config['config']['key'],
|
||||
secret=s3fs_config['config']['secret'],
|
||||
|
|
|
@ -4,14 +4,14 @@ import base64
|
|||
import hashlib
|
||||
import json
|
||||
|
||||
from typing import Dict
|
||||
from typing import Dict, Any
|
||||
|
||||
from lookyloo.default import get_homedir
|
||||
|
||||
if __name__ == '__main__':
|
||||
dest_dir = get_homedir() / 'website' / 'web'
|
||||
|
||||
to_save: Dict = {'static': {}}
|
||||
to_save: Dict[str, Any] = {'static': {}}
|
||||
|
||||
for resource in (dest_dir / 'static').glob('*'):
|
||||
if resource.name[0] == '.':
|
||||
|
|
|
@ -73,7 +73,7 @@ def ua_parser(html_content: str) -> Dict[str, Any]:
|
|||
return to_store
|
||||
|
||||
|
||||
def main():
|
||||
def main() -> None:
|
||||
to_parse = Path('Most Common User Agents - Tech Blog (wh).html')
|
||||
|
||||
today = datetime.now()
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
from typing import List, Tuple
|
||||
from typing import List, Tuple, Any
|
||||
|
||||
from redis import Redis
|
||||
from redis.exceptions import ConnectionError
|
||||
|
@ -21,11 +23,11 @@ console = Console(color_system="256")
|
|||
class Monitoring():
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.redis_cache: Redis = Redis(unix_socket_path=get_socket_path('cache'), decode_responses=True)
|
||||
self.redis_indexing: Redis = Redis(unix_socket_path=get_socket_path('indexing'), decode_responses=True)
|
||||
self.redis_cache: Redis = Redis(unix_socket_path=get_socket_path('cache'), decode_responses=True) # type: ignore[type-arg]
|
||||
self.redis_indexing: Redis = Redis(unix_socket_path=get_socket_path('indexing'), decode_responses=True) # type: ignore[type-arg]
|
||||
|
||||
@property
|
||||
def backend_status(self):
|
||||
def backend_status(self) -> bool:
|
||||
socket_path_cache = get_socket_path('cache')
|
||||
socket_path_index = get_socket_path('indexing')
|
||||
backend_up = True
|
||||
|
@ -56,12 +58,12 @@ class Monitoring():
|
|||
return backend_up
|
||||
|
||||
@property
|
||||
def queues(self):
|
||||
def queues(self) -> list[tuple[str, float]]:
|
||||
return self.redis_cache.zrevrangebyscore('queues', 'Inf', '-Inf', withscores=True)
|
||||
|
||||
@property
|
||||
def ongoing_captures(self):
|
||||
captures_uuid: List[Tuple[str, float]] = self.redis_cache.zrevrangebyscore('to_capture', 'Inf', '-Inf', withscores=True)
|
||||
def ongoing_captures(self) -> list[tuple[str, float, dict[str, Any]]]:
|
||||
captures_uuid: list[tuple[str, float]] = self.redis_cache.zrevrangebyscore('to_capture', 'Inf', '-Inf', withscores=True)
|
||||
if not captures_uuid:
|
||||
return []
|
||||
to_return = []
|
||||
|
@ -75,7 +77,7 @@ class Monitoring():
|
|||
return to_return
|
||||
|
||||
@property
|
||||
def tree_cache(self):
|
||||
def tree_cache(self) -> dict[str, str]:
|
||||
to_return = {}
|
||||
for pid_name, value in self.redis_cache.hgetall('tree_cache').items():
|
||||
pid, name = pid_name.split('|', 1)
|
||||
|
|
|
@ -4,14 +4,14 @@ import csv
|
|||
import argparse
|
||||
import logging
|
||||
|
||||
from lookyloo.lookyloo import Indexing, Lookyloo
|
||||
from lookyloo import Indexing, Lookyloo
|
||||
from lookyloo.helpers import get_captures_dir
|
||||
|
||||
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
||||
level=logging.INFO)
|
||||
|
||||
|
||||
def main():
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description='Rebuild the redis cache.')
|
||||
parser.add_argument('--rebuild_pickles', default=False, action='store_true', help='Delete and rebuild the pickles. Count 20s/pickle, it can take a very long time.')
|
||||
args = parser.parse_args()
|
||||
|
@ -30,7 +30,7 @@ def main():
|
|||
with index.open('r') as _f:
|
||||
recent_uuids = {uuid: str(index.parent / dirname) for uuid, dirname in csv.reader(_f) if (index.parent / dirname).exists()}
|
||||
if recent_uuids:
|
||||
lookyloo.redis.hset('lookup_dirs', mapping=recent_uuids)
|
||||
lookyloo.redis.hset('lookup_dirs', mapping=recent_uuids) # type: ignore[arg-type]
|
||||
|
||||
# This call will rebuild all the caches as needed.
|
||||
lookyloo.sorted_capture_cache()
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
from lookyloo.lookyloo import Lookyloo
|
||||
from lookyloo import Lookyloo
|
||||
import calendar
|
||||
import datetime
|
||||
from urllib.parse import urlparse
|
||||
from typing import Dict, Any, Union, Set
|
||||
from typing import Dict, Any, Union, Set, List
|
||||
|
||||
lookyloo = Lookyloo()
|
||||
|
||||
|
@ -15,10 +15,11 @@ weeks_stats: Dict[int, Dict[str, Union[int, Set[str]]]] = \
|
|||
calendar_week: {'analysis': 0, 'analysis_with_redirects': 0, 'redirects': 0, 'uniq_urls': set()}}
|
||||
|
||||
|
||||
def uniq_domains(uniq_urls):
|
||||
def uniq_domains(uniq_urls: List[str]) -> Set[str]:
|
||||
domains = set()
|
||||
for url in uniq_urls:
|
||||
splitted = urlparse(url)
|
||||
if splitted.hostname:
|
||||
domains.add(splitted.hostname)
|
||||
return domains
|
||||
|
||||
|
@ -50,8 +51,8 @@ for week_number, week_stat in weeks_stats.items():
|
|||
print(' Number of analysis with redirects:', week_stat['analysis_with_redirects'])
|
||||
print(' Number of redirects:', week_stat['redirects'])
|
||||
print(' Number of unique URLs:', len(week_stat['uniq_urls'])) # type: ignore
|
||||
domains = uniq_domains(week_stat['uniq_urls'])
|
||||
print(' Number of unique domains:', len(domains))
|
||||
d = uniq_domains(week_stat['uniq_urls']) # type: ignore[arg-type]
|
||||
print(' Number of unique domains:', len(d))
|
||||
|
||||
|
||||
for year, data in stats.items():
|
||||
|
|
|
@ -7,7 +7,7 @@ import argparse
|
|||
from lookyloo.default import get_homedir
|
||||
|
||||
|
||||
def validate_generic_config_file():
|
||||
def validate_generic_config_file() -> bool:
|
||||
sample_config = get_homedir() / 'config' / 'generic.json.sample'
|
||||
with sample_config.open() as f:
|
||||
generic_config_sample = json.load(f)
|
||||
|
@ -53,7 +53,7 @@ def validate_generic_config_file():
|
|||
return True
|
||||
|
||||
|
||||
def validate_modules_config_file():
|
||||
def validate_modules_config_file() -> bool:
|
||||
with (get_homedir() / 'config' / 'modules.json').open() as f:
|
||||
modules_config = json.load(f)
|
||||
with (get_homedir() / 'config' / 'modules.json.sample').open() as f:
|
||||
|
@ -69,7 +69,7 @@ def validate_modules_config_file():
|
|||
return True
|
||||
|
||||
|
||||
def update_user_configs():
|
||||
def update_user_configs() -> bool:
|
||||
for file_name in ['generic', 'modules']:
|
||||
with (get_homedir() / 'config' / f'{file_name}.json').open() as f:
|
||||
try:
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import calendar
|
||||
import functools
|
||||
|
@ -22,14 +24,15 @@ from uuid import uuid4
|
|||
from zipfile import ZipFile
|
||||
|
||||
import flask_login # type: ignore
|
||||
from flask import (Flask, Response, flash, jsonify, redirect, render_template,
|
||||
from flask import (Flask, Response, Request, flash, jsonify, redirect, render_template,
|
||||
request, send_file, url_for)
|
||||
from flask_bootstrap import Bootstrap5 # type: ignore
|
||||
from flask_cors import CORS # type: ignore
|
||||
from flask_restx import Api # type: ignore
|
||||
from lacuscore import CaptureStatus
|
||||
from pymisp import MISPEvent, MISPServerError
|
||||
from pymisp import MISPEvent, MISPServerError # type: ignore[attr-defined]
|
||||
from werkzeug.security import check_password_hash
|
||||
from werkzeug.wrappers.response import Response as WerkzeugResponse
|
||||
|
||||
from lookyloo.default import get_config
|
||||
from lookyloo.exceptions import MissingUUID, NoValidHarFile
|
||||
|
@ -71,8 +74,8 @@ login_manager.init_app(app)
|
|||
user_agents = UserAgents()
|
||||
|
||||
|
||||
@login_manager.user_loader
|
||||
def user_loader(username):
|
||||
@login_manager.user_loader # type: ignore[misc]
|
||||
def user_loader(username: str) -> User | None:
|
||||
if username not in build_users_table():
|
||||
return None
|
||||
user = User()
|
||||
|
@ -80,13 +83,13 @@ def user_loader(username):
|
|||
return user
|
||||
|
||||
|
||||
@login_manager.request_loader
|
||||
def _load_user_from_request(request):
|
||||
@login_manager.request_loader # type: ignore[misc]
|
||||
def _load_user_from_request(request: Request) -> User | None:
|
||||
return load_user_from_request(request)
|
||||
|
||||
|
||||
@app.route('/login', methods=['GET', 'POST'])
|
||||
def login():
|
||||
def login() -> WerkzeugResponse | str | Response:
|
||||
if request.method == 'GET':
|
||||
return '''
|
||||
<form action='login' method='POST'>
|
||||
|
@ -110,8 +113,8 @@ def login():
|
|||
|
||||
|
||||
@app.route('/logout')
|
||||
@flask_login.login_required
|
||||
def logout():
|
||||
@flask_login.login_required # type: ignore[misc]
|
||||
def logout() -> WerkzeugResponse:
|
||||
flask_login.logout_user()
|
||||
flash('Successfully logged out.', 'success')
|
||||
return redirect(url_for('index'))
|
||||
|
@ -141,7 +144,7 @@ hide_captures_with_error = get_config('generic', 'hide_captures_with_error')
|
|||
|
||||
# Method to make sizes in bytes human readable
|
||||
# Source: https://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
|
||||
def sizeof_fmt(num, suffix='B'):
|
||||
def sizeof_fmt(num: float, suffix: str='B') -> str:
|
||||
for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
|
||||
if abs(num) < 1024.0:
|
||||
return f"{num:3.1f}{unit}{suffix}"
|
||||
|
@ -152,7 +155,7 @@ def sizeof_fmt(num, suffix='B'):
|
|||
app.jinja_env.globals.update(sizeof_fmt=sizeof_fmt)
|
||||
|
||||
|
||||
def http_status_description(code: int):
|
||||
def http_status_description(code: int) -> str:
|
||||
if code in http.client.responses:
|
||||
return http.client.responses[code]
|
||||
return f'Invalid code: {code}'
|
||||
|
@ -161,7 +164,7 @@ def http_status_description(code: int):
|
|||
app.jinja_env.globals.update(http_status_description=http_status_description)
|
||||
|
||||
|
||||
def month_name(month: int):
|
||||
def month_name(month: int) -> str:
|
||||
return calendar.month_name[month]
|
||||
|
||||
|
||||
|
@ -181,8 +184,8 @@ class Icon(TypedDict):
|
|||
tooltip: str
|
||||
|
||||
|
||||
def get_icon(icon_id: str) -> Optional[Icon]:
|
||||
available_icons: Dict[str, Icon] = {
|
||||
def get_icon(icon_id: str) -> Icon | None:
|
||||
available_icons: dict[str, Icon] = {
|
||||
'js': {'icon': "javascript.png", 'tooltip': 'The content of the response is a javascript'},
|
||||
'exe': {'icon': "exe.png", 'tooltip': 'The content of the response is an executable'},
|
||||
'css': {'icon': "css.png", 'tooltip': 'The content of the response is a CSS'},
|
||||
|
@ -208,7 +211,7 @@ def get_icon(icon_id: str) -> Optional[Icon]:
|
|||
app.jinja_env.globals.update(get_icon=get_icon)
|
||||
|
||||
|
||||
def get_tz_info() -> Tuple[Optional[str], str, Set[str]]:
|
||||
def get_tz_info() -> tuple[str | None, str, set[str]]:
|
||||
now = datetime.now().astimezone()
|
||||
local_TZ = now.tzname()
|
||||
local_UTC_offset = f'UTC{now.strftime("%z")}'
|
||||
|
@ -221,7 +224,7 @@ app.jinja_env.globals.update(tz_info=get_tz_info)
|
|||
# ##### Generic/configuration methods #####
|
||||
|
||||
@app.after_request
|
||||
def after_request(response):
|
||||
def after_request(response: Response) -> Response:
|
||||
if use_own_ua:
|
||||
# We keep a list user agents in order to build a list to use in the capture
|
||||
# interface: this is the easiest way to have something up to date.
|
||||
|
@ -241,9 +244,9 @@ def after_request(response):
|
|||
return response
|
||||
|
||||
|
||||
def file_response(func):
|
||||
def file_response(func): # type: ignore[no-untyped-def]
|
||||
@functools.wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
def wrapper(*args, **kwargs) -> Response: # type: ignore[no-untyped-def]
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except NoValidHarFile:
|
||||
|
@ -259,23 +262,23 @@ def file_response(func):
|
|||
# ##### Hostnode level methods #####
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/host/<string:node_uuid>/hashes', methods=['GET'])
|
||||
@file_response
|
||||
def hashes_hostnode(tree_uuid: str, node_uuid: str):
|
||||
@file_response # type: ignore[misc]
|
||||
def hashes_hostnode(tree_uuid: str, node_uuid: str) -> Response:
|
||||
hashes = lookyloo.get_hashes(tree_uuid, hostnode_uuid=node_uuid)
|
||||
return send_file(BytesIO('\n'.join(hashes).encode()),
|
||||
mimetype='test/plain', as_attachment=True, download_name=f'hashes.{node_uuid}.txt')
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/host/<string:node_uuid>/text', methods=['GET'])
|
||||
@file_response
|
||||
def urls_hostnode(tree_uuid: str, node_uuid: str):
|
||||
@file_response # type: ignore[misc]
|
||||
def urls_hostnode(tree_uuid: str, node_uuid: str) -> Response:
|
||||
hostnode = lookyloo.get_hostnode_from_tree(tree_uuid, node_uuid)
|
||||
return send_file(BytesIO('\n'.join(url.name for url in hostnode.urls).encode()),
|
||||
mimetype='test/plain', as_attachment=True, download_name=f'urls.{node_uuid}.txt')
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/host/<string:node_uuid>', methods=['GET'])
|
||||
def hostnode_popup(tree_uuid: str, node_uuid: str):
|
||||
def hostnode_popup(tree_uuid: str, node_uuid: str) -> str | WerkzeugResponse | Response:
|
||||
try:
|
||||
hostnode, urls = lookyloo.get_hostnode_investigator(tree_uuid, node_uuid)
|
||||
except IndexError:
|
||||
|
@ -294,7 +297,7 @@ def hostnode_popup(tree_uuid: str, node_uuid: str):
|
|||
# ##### Tree level Methods #####
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/trigger_modules', methods=['GET'])
|
||||
def trigger_modules(tree_uuid: str):
|
||||
def trigger_modules(tree_uuid: str) -> WerkzeugResponse | str | Response:
|
||||
force = True if (request.args.get('force') and request.args.get('force') == 'True') else False
|
||||
auto_trigger = True if (request.args.get('auto_trigger') and request.args.get('auto_trigger') == 'True') else False
|
||||
lookyloo.trigger_modules(tree_uuid, force=force, auto_trigger=auto_trigger)
|
||||
|
@ -302,7 +305,7 @@ def trigger_modules(tree_uuid: str):
|
|||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/historical_lookups', methods=['GET'])
|
||||
def historical_lookups(tree_uuid: str):
|
||||
def historical_lookups(tree_uuid: str) -> str | WerkzeugResponse | Response:
|
||||
force = True if (request.args.get('force') and request.args.get('force') == 'True') else False
|
||||
data = lookyloo.get_historical_lookups(tree_uuid, force)
|
||||
return render_template('historical_lookups.html', tree_uuid=tree_uuid,
|
||||
|
@ -312,7 +315,7 @@ def historical_lookups(tree_uuid: str):
|
|||
|
||||
@app.route('/tree/<string:tree_uuid>/categories_capture/', defaults={'query': ''})
|
||||
@app.route('/tree/<string:tree_uuid>/categories_capture/<string:query>', methods=['GET'])
|
||||
def categories_capture(tree_uuid: str, query: str):
|
||||
def categories_capture(tree_uuid: str, query: str) -> str | WerkzeugResponse | Response:
|
||||
if not enable_categorization:
|
||||
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
||||
current_categories = lookyloo.categories_capture(tree_uuid)
|
||||
|
@ -330,7 +333,7 @@ def categories_capture(tree_uuid: str, query: str):
|
|||
|
||||
@app.route('/tree/<string:tree_uuid>/uncategorize/', defaults={'category': ''})
|
||||
@app.route('/tree/<string:tree_uuid>/uncategorize/<string:category>', methods=['GET'])
|
||||
def uncategorize_capture(tree_uuid: str, category: str):
|
||||
def uncategorize_capture(tree_uuid: str, category: str) -> str | WerkzeugResponse | Response:
|
||||
if not enable_categorization:
|
||||
return jsonify({'response': 'Categorization not enabled.'})
|
||||
lookyloo.uncategorize_capture(tree_uuid, category)
|
||||
|
@ -339,7 +342,7 @@ def uncategorize_capture(tree_uuid: str, category: str):
|
|||
|
||||
@app.route('/tree/<string:tree_uuid>/categorize/', defaults={'category': ''})
|
||||
@app.route('/tree/<string:tree_uuid>/categorize/<string:category>', methods=['GET'])
|
||||
def categorize_capture(tree_uuid: str, category: str):
|
||||
def categorize_capture(tree_uuid: str, category: str) -> str | WerkzeugResponse | Response:
|
||||
if not enable_categorization:
|
||||
return jsonify({'response': 'Categorization not enabled.'})
|
||||
lookyloo.categorize_capture(tree_uuid, category)
|
||||
|
@ -347,19 +350,19 @@ def categorize_capture(tree_uuid: str, category: str):
|
|||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/stats', methods=['GET'])
|
||||
def stats(tree_uuid: str):
|
||||
def stats(tree_uuid: str) -> str:
|
||||
stats = lookyloo.get_statistics(tree_uuid)
|
||||
return render_template('statistics.html', uuid=tree_uuid, stats=stats)
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/misp_lookup', methods=['GET'])
|
||||
@flask_login.login_required
|
||||
def web_misp_lookup_view(tree_uuid: str):
|
||||
@flask_login.login_required # type: ignore[misc]
|
||||
def web_misp_lookup_view(tree_uuid: str) -> str | WerkzeugResponse | Response:
|
||||
if not lookyloo.misps.available:
|
||||
flash('There are no MISP instances available.', 'error')
|
||||
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
||||
misps_occurrences = {}
|
||||
for instance_name in lookyloo.misps:
|
||||
for instance_name in lookyloo.misps.keys():
|
||||
if occurrences := lookyloo.get_misp_occurrences(tree_uuid, instance_name=instance_name):
|
||||
misps_occurrences[instance_name] = occurrences
|
||||
return render_template('misp_lookup.html', uuid=tree_uuid,
|
||||
|
@ -368,8 +371,8 @@ def web_misp_lookup_view(tree_uuid: str):
|
|||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/misp_push', methods=['GET', 'POST'])
|
||||
@flask_login.login_required
|
||||
def web_misp_push_view(tree_uuid: str):
|
||||
@flask_login.login_required # type: ignore[misc]
|
||||
def web_misp_push_view(tree_uuid: str) -> str | WerkzeugResponse | Response | None:
|
||||
if not lookyloo.misps.available:
|
||||
flash('There are no MISP instances available.', 'error')
|
||||
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
||||
|
@ -413,7 +416,7 @@ def web_misp_push_view(tree_uuid: str):
|
|||
# Submit the event
|
||||
tags = request.form.getlist('tags')
|
||||
error = False
|
||||
events: List[MISPEvent] = []
|
||||
events: list[MISPEvent] = []
|
||||
with_parents = request.form.get('with_parents')
|
||||
if with_parents:
|
||||
exports = lookyloo.misp_export(tree_uuid, True)
|
||||
|
@ -447,15 +450,16 @@ def web_misp_push_view(tree_uuid: str):
|
|||
for e in new_events:
|
||||
flash(f'MISP event {e.id} created on {misp.client.root_url}', 'success')
|
||||
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
||||
return None
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/modules', methods=['GET'])
|
||||
def modules(tree_uuid: str):
|
||||
def modules(tree_uuid: str) -> str | WerkzeugResponse | Response:
|
||||
modules_responses = lookyloo.get_modules_responses(tree_uuid)
|
||||
if not modules_responses:
|
||||
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
||||
|
||||
vt_short_result: Dict[str, Dict[str, Any]] = {}
|
||||
vt_short_result: dict[str, dict[str, Any]] = {}
|
||||
if 'vt' in modules_responses:
|
||||
# VirusTotal cleanup
|
||||
vt = modules_responses.pop('vt')
|
||||
|
@ -471,7 +475,7 @@ def modules(tree_uuid: str):
|
|||
if result['category'] == 'malicious':
|
||||
vt_short_result[url]['malicious'].append((vendor, result['result']))
|
||||
|
||||
pi_short_result: Dict[str, str] = {}
|
||||
pi_short_result: dict[str, str] = {}
|
||||
if 'pi' in modules_responses:
|
||||
pi = modules_responses.pop('pi')
|
||||
for url, full_report in pi.items():
|
||||
|
@ -479,7 +483,7 @@ def modules(tree_uuid: str):
|
|||
continue
|
||||
pi_short_result[url] = full_report['results'][0]['tag_label']
|
||||
|
||||
phishtank_short_result: Dict[str, Dict] = {'urls': {}, 'ips_hits': {}}
|
||||
phishtank_short_result: dict[str, dict[str, Any]] = {'urls': {}, 'ips_hits': {}}
|
||||
if 'phishtank' in modules_responses:
|
||||
pt = modules_responses.pop('phishtank')
|
||||
for url, full_report in pt['urls'].items():
|
||||
|
@ -496,7 +500,7 @@ def modules(tree_uuid: str):
|
|||
full_report['url'],
|
||||
full_report['phish_detail_url']))
|
||||
|
||||
urlhaus_short_result: Dict[str, List] = {'urls': []}
|
||||
urlhaus_short_result: dict[str, list[Any]] = {'urls': []}
|
||||
if 'urlhaus' in modules_responses:
|
||||
# TODO: make a short result
|
||||
uh = modules_responses.pop('urlhaus')
|
||||
|
@ -504,7 +508,7 @@ def modules(tree_uuid: str):
|
|||
if results:
|
||||
urlhaus_short_result['urls'].append(results)
|
||||
|
||||
urlscan_to_display: Dict = {}
|
||||
urlscan_to_display: dict[str, Any] = {}
|
||||
if 'urlscan' in modules_responses and modules_responses.get('urlscan'):
|
||||
urlscan = modules_responses.pop('urlscan')
|
||||
if 'error' in urlscan['submission']:
|
||||
|
@ -534,8 +538,8 @@ def modules(tree_uuid: str):
|
|||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/redirects', methods=['GET'])
|
||||
@file_response
|
||||
def redirects(tree_uuid: str):
|
||||
@file_response # type: ignore[misc]
|
||||
def redirects(tree_uuid: str) -> Response:
|
||||
cache = lookyloo.capture_cache(tree_uuid)
|
||||
if not cache or not hasattr(cache, 'redirects'):
|
||||
return Response('Not available.', mimetype='text/text')
|
||||
|
@ -550,8 +554,8 @@ def redirects(tree_uuid: str):
|
|||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/image', methods=['GET'])
|
||||
@file_response
|
||||
def image(tree_uuid: str):
|
||||
@file_response # type: ignore[misc]
|
||||
def image(tree_uuid: str) -> Response:
|
||||
max_width = request.args.get('width')
|
||||
if max_width and max_width.isdigit():
|
||||
to_return = lookyloo.get_screenshot_thumbnail(tree_uuid, width=int(max_width))
|
||||
|
@ -562,12 +566,11 @@ def image(tree_uuid: str):
|
|||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/data', methods=['GET'])
|
||||
@file_response
|
||||
def data(tree_uuid: str):
|
||||
@file_response # type: ignore[misc]
|
||||
def data(tree_uuid: str) -> Response:
|
||||
filename, data = lookyloo.get_data(tree_uuid)
|
||||
if len(filename) == 0:
|
||||
# TODO: return something saying it is not a valid request
|
||||
return
|
||||
return Response('No files.', mimetype='text/text')
|
||||
|
||||
if filetype.guess_mime(data.getvalue()) is None:
|
||||
mime = 'application/octet-stream'
|
||||
|
@ -579,46 +582,46 @@ def data(tree_uuid: str):
|
|||
|
||||
@app.route('/tree/<string:tree_uuid>/thumbnail/', defaults={'width': 64}, methods=['GET'])
|
||||
@app.route('/tree/<string:tree_uuid>/thumbnail/<int:width>', methods=['GET'])
|
||||
@file_response
|
||||
def thumbnail(tree_uuid: str, width: int):
|
||||
@file_response # type: ignore[misc]
|
||||
def thumbnail(tree_uuid: str, width: int) -> Response:
|
||||
to_return = lookyloo.get_screenshot_thumbnail(tree_uuid, for_datauri=False, width=width)
|
||||
return send_file(to_return, mimetype='image/png')
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/html', methods=['GET'])
|
||||
@file_response
|
||||
def html(tree_uuid: str):
|
||||
@file_response # type: ignore[misc]
|
||||
def html(tree_uuid: str) -> Response:
|
||||
to_return = lookyloo.get_html(tree_uuid)
|
||||
return send_file(to_return, mimetype='text/html',
|
||||
as_attachment=True, download_name='page.html')
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/cookies', methods=['GET'])
|
||||
@file_response
|
||||
def cookies(tree_uuid: str):
|
||||
@file_response # type: ignore[misc]
|
||||
def cookies(tree_uuid: str) -> Response:
|
||||
to_return = lookyloo.get_cookies(tree_uuid)
|
||||
return send_file(to_return, mimetype='application/json',
|
||||
as_attachment=True, download_name='cookies.json')
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/hashes', methods=['GET'])
|
||||
@file_response
|
||||
def hashes_tree(tree_uuid: str):
|
||||
@file_response # type: ignore[misc]
|
||||
def hashes_tree(tree_uuid: str) -> Response:
|
||||
hashes = lookyloo.get_hashes(tree_uuid)
|
||||
return send_file(BytesIO('\n'.join(hashes).encode()),
|
||||
mimetype='test/plain', as_attachment=True, download_name='hashes.txt')
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/export', methods=['GET'])
|
||||
@file_response
|
||||
def export(tree_uuid: str):
|
||||
@file_response # type: ignore[misc]
|
||||
def export(tree_uuid: str) -> Response:
|
||||
to_return = lookyloo.get_capture(tree_uuid)
|
||||
return send_file(to_return, mimetype='application/zip',
|
||||
as_attachment=True, download_name='capture.zip')
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/urls_rendered_page', methods=['GET'])
|
||||
def urls_rendered_page(tree_uuid: str):
|
||||
def urls_rendered_page(tree_uuid: str) -> WerkzeugResponse | str | Response:
|
||||
try:
|
||||
urls = lookyloo.get_urls_rendered_page(tree_uuid)
|
||||
return render_template('urls_rendered.html', base_tree_uuid=tree_uuid, urls=urls)
|
||||
|
@ -628,7 +631,7 @@ def urls_rendered_page(tree_uuid: str):
|
|||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/hashlookup', methods=['GET'])
|
||||
def hashlookup(tree_uuid: str):
|
||||
def hashlookup(tree_uuid: str) -> str | WerkzeugResponse | Response:
|
||||
merged, total_ressources = lookyloo.merge_hashlookup_tree(tree_uuid)
|
||||
# We only want unique URLs for the template
|
||||
for sha1, entries in merged.items():
|
||||
|
@ -637,7 +640,7 @@ def hashlookup(tree_uuid: str):
|
|||
|
||||
|
||||
@app.route('/bulk_captures/<string:base_tree_uuid>', methods=['POST'])
|
||||
def bulk_captures(base_tree_uuid: str):
|
||||
def bulk_captures(base_tree_uuid: str) -> WerkzeugResponse | str | Response:
|
||||
if flask_login.current_user.is_authenticated:
|
||||
user = flask_login.current_user.get_id()
|
||||
else:
|
||||
|
@ -666,16 +669,16 @@ def bulk_captures(base_tree_uuid: str):
|
|||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/hide', methods=['GET'])
|
||||
@flask_login.login_required
|
||||
def hide_capture(tree_uuid: str):
|
||||
@flask_login.login_required # type: ignore[misc]
|
||||
def hide_capture(tree_uuid: str) -> WerkzeugResponse:
|
||||
lookyloo.hide_capture(tree_uuid)
|
||||
flash('Successfully hidden.', 'success')
|
||||
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/rebuild')
|
||||
@flask_login.login_required
|
||||
def rebuild_tree(tree_uuid: str):
|
||||
@flask_login.login_required # type: ignore[misc]
|
||||
def rebuild_tree(tree_uuid: str) -> WerkzeugResponse:
|
||||
try:
|
||||
lookyloo.remove_pickle(tree_uuid)
|
||||
flash('Successfully rebuilt.', 'success')
|
||||
|
@ -685,13 +688,13 @@ def rebuild_tree(tree_uuid: str):
|
|||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/cache', methods=['GET'])
|
||||
def cache_tree(tree_uuid: str):
|
||||
def cache_tree(tree_uuid: str) -> WerkzeugResponse:
|
||||
lookyloo.capture_cache(tree_uuid)
|
||||
return redirect(url_for('index'))
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/monitor', methods=['POST', 'GET'])
|
||||
def monitor(tree_uuid: str):
|
||||
def monitor(tree_uuid: str) -> WerkzeugResponse:
|
||||
if not lookyloo.monitoring_enabled:
|
||||
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
||||
if request.form.get('name') or not request.form.get('confirm'):
|
||||
|
@ -702,7 +705,7 @@ def monitor(tree_uuid: str):
|
|||
collection: str = request.form['collection'] if request.form.get('collection') else ''
|
||||
notification_email: str = request.form['notification'] if request.form.get('notification') else ''
|
||||
frequency: str = request.form['frequency'] if request.form.get('frequency') else 'daily'
|
||||
expire_at: Optional[float] = datetime.fromisoformat(request.form['expire_at']).timestamp() if request.form.get('expire_at') else None
|
||||
expire_at: float | None = datetime.fromisoformat(request.form['expire_at']).timestamp() if request.form.get('expire_at') else None
|
||||
cache = lookyloo.capture_cache(tree_uuid)
|
||||
if cache:
|
||||
monitoring_uuid = lookyloo.monitoring.monitor({'url': cache.url, 'user_agent': cache.user_agent, 'listing': False},
|
||||
|
@ -719,7 +722,7 @@ def monitor(tree_uuid: str):
|
|||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/send_mail', methods=['POST', 'GET'])
|
||||
def send_mail(tree_uuid: str):
|
||||
def send_mail(tree_uuid: str) -> WerkzeugResponse:
|
||||
if not enable_mail_notification:
|
||||
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
||||
if request.form.get('name') or not request.form.get('confirm'):
|
||||
|
@ -739,7 +742,7 @@ def send_mail(tree_uuid: str):
|
|||
|
||||
@app.route('/tree/<string:tree_uuid>', methods=['GET'])
|
||||
@app.route('/tree/<string:tree_uuid>/<string:node_uuid>', methods=['GET'])
|
||||
def tree(tree_uuid: str, node_uuid: Optional[str]=None):
|
||||
def tree(tree_uuid: str, node_uuid: str | None=None) -> Response | str | WerkzeugResponse:
|
||||
if tree_uuid == 'False':
|
||||
flash("Unable to process your request.", 'warning')
|
||||
return redirect(url_for('index'))
|
||||
|
@ -820,10 +823,10 @@ def tree(tree_uuid: str, node_uuid: Optional[str]=None):
|
|||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/mark_as_legitimate', methods=['POST'])
|
||||
@flask_login.login_required
|
||||
def mark_as_legitimate(tree_uuid: str):
|
||||
@flask_login.login_required # type: ignore[misc]
|
||||
def mark_as_legitimate(tree_uuid: str) -> Response:
|
||||
if request.data:
|
||||
legitimate_entries: Dict = request.get_json(force=True)
|
||||
legitimate_entries: dict[str, Any] = request.get_json(force=True)
|
||||
lookyloo.add_to_legitimate(tree_uuid, **legitimate_entries)
|
||||
else:
|
||||
lookyloo.add_to_legitimate(tree_uuid)
|
||||
|
@ -831,13 +834,13 @@ def mark_as_legitimate(tree_uuid: str):
|
|||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/body_hashes', methods=['GET'])
|
||||
def tree_body_hashes(tree_uuid: str):
|
||||
def tree_body_hashes(tree_uuid: str) -> str:
|
||||
body_hashes = lookyloo.get_all_body_hashes(tree_uuid)
|
||||
return render_template('tree_body_hashes.html', tree_uuid=tree_uuid, body_hashes=body_hashes)
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/pandora', methods=['GET', 'POST'])
|
||||
def pandora_submit(tree_uuid: str):
|
||||
def pandora_submit(tree_uuid: str) -> dict[str, Any] | Response:
|
||||
node_uuid = None
|
||||
if request.method == 'POST':
|
||||
input_json = request.get_json(force=True)
|
||||
|
@ -860,14 +863,14 @@ def pandora_submit(tree_uuid: str):
|
|||
|
||||
# ##### helpers #####
|
||||
|
||||
def index_generic(show_hidden: bool=False, show_error: bool=True, category: Optional[str]=None):
|
||||
def index_generic(show_hidden: bool=False, show_error: bool=True, category: str | None=None) -> str:
|
||||
"""This method is used to generate the index page. It is possible that some of the captures
|
||||
do not have their pickle yet.
|
||||
|
||||
We must assume that calling cached.tree will fail, and handle it gracefully.
|
||||
"""
|
||||
titles = []
|
||||
cut_time: Optional[datetime] = None
|
||||
cut_time: datetime | None = None
|
||||
if time_delta_on_index:
|
||||
# We want to filter the captures on the index
|
||||
cut_time = (datetime.now() - timedelta(**time_delta_on_index))
|
||||
|
@ -899,7 +902,7 @@ def index_generic(show_hidden: bool=False, show_error: bool=True, category: Opti
|
|||
version=pkg_version)
|
||||
|
||||
|
||||
def get_index_params(request):
|
||||
def get_index_params(request: Request) -> tuple[bool, str]:
|
||||
show_error: bool = True
|
||||
category: str = ''
|
||||
if hide_captures_with_error:
|
||||
|
@ -913,7 +916,7 @@ def get_index_params(request):
|
|||
# ##### Index level methods #####
|
||||
|
||||
@app.route('/', methods=['GET'])
|
||||
def index():
|
||||
def index() -> str:
|
||||
if request.method == 'HEAD':
|
||||
# Just returns ack if the webserver is running
|
||||
return 'Ack'
|
||||
|
@ -922,28 +925,28 @@ def index():
|
|||
|
||||
|
||||
@app.route('/hidden', methods=['GET'])
|
||||
@flask_login.login_required
|
||||
def index_hidden():
|
||||
@flask_login.login_required # type: ignore[misc]
|
||||
def index_hidden() -> str:
|
||||
show_error, category = get_index_params(request)
|
||||
return index_generic(show_hidden=True, show_error=show_error, category=category)
|
||||
|
||||
|
||||
@app.route('/cookies', methods=['GET'])
|
||||
def cookies_lookup():
|
||||
def cookies_lookup() -> str:
|
||||
cookies_names = [(name, freq, lookyloo.indexing.cookies_names_number_domains(name))
|
||||
for name, freq in lookyloo.indexing.cookies_names]
|
||||
return render_template('cookies.html', cookies_names=cookies_names)
|
||||
|
||||
|
||||
@app.route('/hhhashes', methods=['GET'])
|
||||
def hhhashes_lookup():
|
||||
def hhhashes_lookup() -> str:
|
||||
hhhashes = [(hhh, freq, lookyloo.indexing.http_headers_hashes_number_captures(hhh))
|
||||
for hhh, freq in lookyloo.indexing.http_headers_hashes]
|
||||
return render_template('hhhashes.html', hhhashes=hhhashes)
|
||||
|
||||
|
||||
@app.route('/ressources', methods=['GET'])
|
||||
def ressources():
|
||||
def ressources() -> str:
|
||||
ressources = []
|
||||
for h, freq in lookyloo.indexing.ressources:
|
||||
domain_freq = lookyloo.indexing.ressources_number_domains(h)
|
||||
|
@ -961,26 +964,26 @@ def ressources():
|
|||
|
||||
|
||||
@app.route('/categories', methods=['GET'])
|
||||
def categories():
|
||||
def categories() -> str:
|
||||
return render_template('categories.html', categories=lookyloo.indexing.categories)
|
||||
|
||||
|
||||
@app.route('/rebuild_all')
|
||||
@flask_login.login_required
|
||||
def rebuild_all():
|
||||
@flask_login.login_required # type: ignore[misc]
|
||||
def rebuild_all() -> WerkzeugResponse:
|
||||
lookyloo.rebuild_all()
|
||||
return redirect(url_for('index'))
|
||||
|
||||
|
||||
@app.route('/rebuild_cache')
|
||||
@flask_login.login_required
|
||||
def rebuild_cache():
|
||||
@flask_login.login_required # type: ignore[misc]
|
||||
def rebuild_cache() -> WerkzeugResponse:
|
||||
lookyloo.rebuild_cache()
|
||||
return redirect(url_for('index'))
|
||||
|
||||
|
||||
@app.route('/search', methods=['GET', 'POST'])
|
||||
def search():
|
||||
def search() -> str | Response | WerkzeugResponse:
|
||||
if request.form.get('url'):
|
||||
quoted_url: str = quote_plus(request.form['url'])
|
||||
return redirect(url_for('url_details', url=quoted_url))
|
||||
|
@ -993,7 +996,7 @@ def search():
|
|||
return render_template('search.html')
|
||||
|
||||
|
||||
def _prepare_capture_template(user_ua: Optional[str], predefined_url: Optional[str]=None):
|
||||
def _prepare_capture_template(user_ua: str | None, predefined_url: str | None=None) -> str:
|
||||
return render_template('capture.html', user_agents=user_agents.user_agents,
|
||||
default=user_agents.default,
|
||||
personal_ua=user_ua,
|
||||
|
@ -1004,7 +1007,7 @@ def _prepare_capture_template(user_ua: Optional[str], predefined_url: Optional[s
|
|||
|
||||
|
||||
@app.route('/recapture/<string:tree_uuid>', methods=['GET'])
|
||||
def recapture(tree_uuid: str):
|
||||
def recapture(tree_uuid: str) -> str | Response | WerkzeugResponse:
|
||||
cache = lookyloo.capture_cache(tree_uuid)
|
||||
if cache and hasattr(cache, 'url'):
|
||||
return _prepare_capture_template(user_ua=request.headers.get('User-Agent'),
|
||||
|
@ -1016,15 +1019,15 @@ def recapture(tree_uuid: str):
|
|||
# ################## Submit existing capture ##################
|
||||
|
||||
@app.route('/submit_capture', methods=['GET', 'POST'])
|
||||
def submit_capture():
|
||||
def submit_capture() -> str | Response | WerkzeugResponse:
|
||||
|
||||
if request.method == 'POST':
|
||||
listing = True if request.form.get('listing') else False
|
||||
uuid = str(uuid4()) # NOTE: new UUID, because we do not want duplicates
|
||||
har: Optional[Dict[str, Any]] = None
|
||||
html: Optional[str] = None
|
||||
last_redirected_url: Optional[str] = None
|
||||
screenshot: Optional[bytes] = None
|
||||
har: dict[str, Any] | None = None
|
||||
html: str | None = None
|
||||
last_redirected_url: str | None = None
|
||||
screenshot: bytes | None = None
|
||||
if 'har_file' in request.files and request.files['har_file']:
|
||||
har = json.loads(request.files['har_file'].stream.read())
|
||||
last_redirected_url = request.form.get('landing_page')
|
||||
|
@ -1038,7 +1041,7 @@ def submit_capture():
|
|||
return redirect(url_for('tree', tree_uuid=uuid))
|
||||
elif 'full_capture' in request.files and request.files['full_capture']:
|
||||
# it *only* accepts a lookyloo export.
|
||||
cookies: Optional[List[Dict[str, str]]] = None
|
||||
cookies: list[dict[str, str]] | None = None
|
||||
has_error = False
|
||||
with ZipFile(BytesIO(request.files['full_capture'].stream.read()), 'r') as lookyloo_capture:
|
||||
potential_favicons = set()
|
||||
|
@ -1084,7 +1087,7 @@ def submit_capture():
|
|||
# #############################################################
|
||||
|
||||
@app.route('/capture', methods=['GET', 'POST'])
|
||||
def capture_web():
|
||||
def capture_web() -> str | Response | WerkzeugResponse:
|
||||
if flask_login.current_user.is_authenticated:
|
||||
user = flask_login.current_user.get_id()
|
||||
else:
|
||||
|
@ -1143,7 +1146,7 @@ def capture_web():
|
|||
parsed_proxy = urlparse(request.form['proxy'])
|
||||
if parsed_proxy.scheme and parsed_proxy.hostname and parsed_proxy.port:
|
||||
if parsed_proxy.scheme in ['http', 'https', 'socks5']:
|
||||
if (parsed_proxy.username and parsed_proxy.password) != (not parsed_proxy.username and not parsed_proxy.password):
|
||||
if (parsed_proxy.username and parsed_proxy.password) or (not parsed_proxy.username and not parsed_proxy.password):
|
||||
capture_query['proxy'] = request.form['proxy']
|
||||
else:
|
||||
flash('You need to enter a username AND a password for your proxy.', 'error')
|
||||
|
@ -1192,47 +1195,47 @@ def capture_web():
|
|||
|
||||
|
||||
@app.route('/cookies/<string:cookie_name>', methods=['GET'])
|
||||
def cookies_name_detail(cookie_name: str):
|
||||
def cookies_name_detail(cookie_name: str) -> str:
|
||||
captures, domains = lookyloo.get_cookie_name_investigator(cookie_name.strip())
|
||||
return render_template('cookie_name.html', cookie_name=cookie_name, domains=domains, captures=captures)
|
||||
|
||||
|
||||
@app.route('/hhhdetails/<string:hhh>', methods=['GET'])
|
||||
def hhh_detail(hhh: str):
|
||||
def hhh_detail(hhh: str) -> str:
|
||||
captures, headers = lookyloo.get_hhh_investigator(hhh.strip())
|
||||
return render_template('hhh_details.html', hhh=hhh, captures=captures, headers=headers)
|
||||
|
||||
|
||||
@app.route('/body_hashes/<string:body_hash>', methods=['GET'])
|
||||
def body_hash_details(body_hash: str):
|
||||
def body_hash_details(body_hash: str) -> str:
|
||||
from_popup = True if (request.args.get('from_popup') and request.args.get('from_popup') == 'True') else False
|
||||
captures, domains = lookyloo.get_body_hash_investigator(body_hash.strip())
|
||||
return render_template('body_hash.html', body_hash=body_hash, domains=domains, captures=captures, from_popup=from_popup)
|
||||
|
||||
|
||||
@app.route('/urls/<string:url>', methods=['GET'])
|
||||
def url_details(url: str):
|
||||
def url_details(url: str) -> str:
|
||||
url = unquote_plus(url).strip()
|
||||
hits = lookyloo.get_url_occurrences(url, limit=50)
|
||||
return render_template('url.html', url=url, hits=hits)
|
||||
|
||||
|
||||
@app.route('/hostnames/<string:hostname>', methods=['GET'])
|
||||
def hostname_details(hostname: str):
|
||||
def hostname_details(hostname: str) -> str:
|
||||
hits = lookyloo.get_hostname_occurrences(hostname.strip(), with_urls_occurrences=True, limit=50)
|
||||
return render_template('hostname.html', hostname=hostname, hits=hits)
|
||||
|
||||
|
||||
@app.route('/stats', methods=['GET'])
|
||||
def statsfull():
|
||||
def statsfull() -> str:
|
||||
stats = lookyloo.get_stats()
|
||||
return render_template('stats.html', stats=stats)
|
||||
|
||||
|
||||
@app.route('/whois/<string:query>', methods=['GET'])
|
||||
@app.route('/whois/<string:query>/<int:email_only>', methods=['GET'])
|
||||
@file_response
|
||||
def whois(query: str, email_only: int=0):
|
||||
@file_response # type: ignore[misc]
|
||||
def whois(query: str, email_only: int=0) -> Response:
|
||||
to_return = lookyloo.uwhois.whois(query, bool(email_only))
|
||||
if isinstance(to_return, str):
|
||||
return send_file(BytesIO(to_return.encode()),
|
||||
|
@ -1243,35 +1246,35 @@ def whois(query: str, email_only: int=0):
|
|||
# ##### Methods related to a specific URLNode #####
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/request_cookies', methods=['GET'])
|
||||
@file_response
|
||||
def urlnode_request_cookies(tree_uuid: str, node_uuid: str):
|
||||
@file_response # type: ignore[misc]
|
||||
def urlnode_request_cookies(tree_uuid: str, node_uuid: str) -> Response | None:
|
||||
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
||||
if not urlnode.request_cookie:
|
||||
return
|
||||
return None
|
||||
|
||||
return send_file(BytesIO(json.dumps(urlnode.request_cookie, indent=2).encode()),
|
||||
mimetype='text/plain', as_attachment=True, download_name='request_cookies.txt')
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/response_cookies', methods=['GET'])
|
||||
@file_response
|
||||
def urlnode_response_cookies(tree_uuid: str, node_uuid: str):
|
||||
@file_response # type: ignore[misc]
|
||||
def urlnode_response_cookies(tree_uuid: str, node_uuid: str) -> Response | None:
|
||||
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
||||
if not urlnode.response_cookie:
|
||||
return
|
||||
return None
|
||||
|
||||
return send_file(BytesIO(json.dumps(urlnode.response_cookie, indent=2).encode()),
|
||||
mimetype='text/plain', as_attachment=True, download_name='response_cookies.txt')
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/urls_in_rendered_content', methods=['GET'])
|
||||
@file_response
|
||||
def urlnode_urls_in_rendered_content(tree_uuid: str, node_uuid: str):
|
||||
@file_response # type: ignore[misc]
|
||||
def urlnode_urls_in_rendered_content(tree_uuid: str, node_uuid: str) -> Response | None:
|
||||
# Note: we could simplify it with lookyloo.get_urls_rendered_page, but if at somepoint,
|
||||
# we have multiple page rendered on one tree, it will be a problem.
|
||||
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
||||
if not hasattr(urlnode, 'rendered_html') or not urlnode.rendered_html:
|
||||
return
|
||||
return None
|
||||
|
||||
ct = lookyloo.get_crawled_tree(tree_uuid)
|
||||
not_loaded_urls = sorted(set(urlnode.urls_in_rendered_page)
|
||||
|
@ -1283,22 +1286,22 @@ def urlnode_urls_in_rendered_content(tree_uuid: str, node_uuid: str):
|
|||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/rendered_content', methods=['GET'])
|
||||
@file_response
|
||||
def urlnode_rendered_content(tree_uuid: str, node_uuid: str):
|
||||
@file_response # type: ignore[misc]
|
||||
def urlnode_rendered_content(tree_uuid: str, node_uuid: str) -> Response | None:
|
||||
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
||||
if not urlnode.rendered_html:
|
||||
return
|
||||
return None
|
||||
return send_file(BytesIO(urlnode.rendered_html.getvalue()), mimetype='text/plain',
|
||||
as_attachment=True, download_name='rendered_content.txt')
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/posted_data', methods=['GET'])
|
||||
@file_response
|
||||
def urlnode_post_request(tree_uuid: str, node_uuid: str):
|
||||
@file_response # type: ignore[misc]
|
||||
def urlnode_post_request(tree_uuid: str, node_uuid: str) -> Response | None:
|
||||
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
||||
if not urlnode.posted_data:
|
||||
return
|
||||
posted: Union[str, bytes]
|
||||
return None
|
||||
posted: str | bytes
|
||||
if isinstance(urlnode.posted_data, (dict, list)):
|
||||
# JSON blob, pretty print.
|
||||
posted = json.dumps(urlnode.posted_data, indent=2)
|
||||
|
@ -1322,8 +1325,8 @@ def urlnode_post_request(tree_uuid: str, node_uuid: str):
|
|||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/ressource', methods=['POST', 'GET'])
|
||||
@file_response
|
||||
def get_ressource(tree_uuid: str, node_uuid: str):
|
||||
@file_response # type: ignore[misc]
|
||||
def get_ressource(tree_uuid: str, node_uuid: str) -> Response:
|
||||
if request.method == 'POST':
|
||||
h_request = request.form.get('ressource_hash')
|
||||
else:
|
||||
|
@ -1343,8 +1346,8 @@ def get_ressource(tree_uuid: str, node_uuid: str):
|
|||
|
||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/ressource_preview', methods=['GET'])
|
||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/ressource_preview/<string:h_ressource>', methods=['GET'])
|
||||
@file_response
|
||||
def get_ressource_preview(tree_uuid: str, node_uuid: str, h_ressource: Optional[str]=None):
|
||||
@file_response # type: ignore[misc]
|
||||
def get_ressource_preview(tree_uuid: str, node_uuid: str, h_ressource: str | None=None) -> Response:
|
||||
ressource = lookyloo.get_ressource(tree_uuid, node_uuid, h_ressource)
|
||||
if not ressource:
|
||||
return Response('No preview available.', mimetype='text/text')
|
||||
|
@ -1356,16 +1359,16 @@ def get_ressource_preview(tree_uuid: str, node_uuid: str, h_ressource: Optional[
|
|||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/hashes', methods=['GET'])
|
||||
@file_response
|
||||
def hashes_urlnode(tree_uuid: str, node_uuid: str):
|
||||
@file_response # type: ignore[misc]
|
||||
def hashes_urlnode(tree_uuid: str, node_uuid: str) -> Response:
|
||||
hashes = lookyloo.get_hashes(tree_uuid, urlnode_uuid=node_uuid)
|
||||
return send_file(BytesIO('\n'.join(hashes).encode()),
|
||||
mimetype='test/plain', as_attachment=True, download_name='hashes.txt')
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/add_context', methods=['POST'])
|
||||
@flask_login.login_required
|
||||
def add_context(tree_uuid: str, node_uuid: str):
|
||||
@flask_login.login_required # type: ignore[misc]
|
||||
def add_context(tree_uuid: str, node_uuid: str) -> WerkzeugResponse | None:
|
||||
if not enable_context_by_users:
|
||||
return redirect(url_for('ressources'))
|
||||
|
||||
|
@ -1375,7 +1378,7 @@ def add_context(tree_uuid: str, node_uuid: str):
|
|||
callback_str: str = context_data['callback_str']
|
||||
legitimate: bool = True if context_data.get('legitimate') else False
|
||||
malicious: bool = True if context_data.get('malicious') else False
|
||||
details: Dict[str, Dict] = {'malicious': {}, 'legitimate': {}}
|
||||
details: dict[str, dict[str, Any]] = {'malicious': {}, 'legitimate': {}}
|
||||
if malicious:
|
||||
malicious_details = {}
|
||||
if context_data.get('malicious_type'):
|
||||
|
@ -1396,6 +1399,7 @@ def add_context(tree_uuid: str, node_uuid: str):
|
|||
return redirect(url_for('hostnode_popup', tree_uuid=tree_uuid, node_uuid=hostnode_uuid))
|
||||
elif callback_str == 'ressources':
|
||||
return redirect(url_for('ressources'))
|
||||
return None
|
||||
|
||||
|
||||
# Query API
|
||||
|
|
|
@ -1,20 +1,22 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
import json
|
||||
|
||||
from io import BytesIO
|
||||
from typing import Any, Dict, Optional
|
||||
from typing import Any, Dict, Optional, Tuple, List
|
||||
from zipfile import ZipFile
|
||||
|
||||
import flask_login # type: ignore
|
||||
from flask import request, send_file
|
||||
from flask import request, send_file, Response
|
||||
from flask_restx import Namespace, Resource, abort, fields # type: ignore
|
||||
from werkzeug.security import check_password_hash
|
||||
|
||||
from lacuscore import CaptureStatus as CaptureStatusCore
|
||||
from pylacus import CaptureStatus as CaptureStatusPy
|
||||
from pylacus import CaptureStatus as CaptureStatusPy # type: ignore[attr-defined]
|
||||
from lookyloo.comparator import Comparator
|
||||
from lookyloo.exceptions import MissingUUID, NoValidHarFile
|
||||
from lookyloo.lookyloo import CaptureSettings, Lookyloo
|
||||
|
@ -27,7 +29,7 @@ lookyloo: Lookyloo = get_lookyloo_instance()
|
|||
comparator: Comparator = Comparator()
|
||||
|
||||
|
||||
def api_auth_check(method):
|
||||
def api_auth_check(method): # type: ignore
|
||||
if flask_login.current_user.is_authenticated or load_user_from_request(request):
|
||||
return method
|
||||
abort(403, 'Authentication required.')
|
||||
|
@ -39,30 +41,30 @@ token_request_fields = api.model('AuthTokenFields', {
|
|||
})
|
||||
|
||||
|
||||
@api.errorhandler(NoValidHarFile)
|
||||
def handle_no_HAR_file_exception(error):
|
||||
@api.errorhandler(NoValidHarFile) # type: ignore[misc]
|
||||
def handle_no_HAR_file_exception(error: Any) -> tuple[dict[str, str], int]:
|
||||
'''The capture has no HAR file, it failed for some reason.'''
|
||||
return {'message': str(error)}, 400
|
||||
|
||||
|
||||
@api.route('/json/get_token')
|
||||
@api.doc(description='Get the API token required for authenticated calls')
|
||||
class AuthToken(Resource):
|
||||
class AuthToken(Resource): # type: ignore[misc]
|
||||
|
||||
users_table = build_users_table()
|
||||
|
||||
@api.param('username', 'Your username')
|
||||
@api.param('password', 'Your password')
|
||||
def get(self):
|
||||
username: Optional[str] = request.args['username'] if request.args.get('username') else None
|
||||
password: Optional[str] = request.args['password'] if request.args.get('password') else None
|
||||
@api.param('username', 'Your username') # type: ignore[misc]
|
||||
@api.param('password', 'Your password') # type: ignore[misc]
|
||||
def get(self) -> dict[str, str] | tuple[dict[str, str], int]:
|
||||
username: str | None = request.args['username'] if request.args.get('username') else None
|
||||
password: str | None = request.args['password'] if request.args.get('password') else None
|
||||
if username and password and username in self.users_table and check_password_hash(self.users_table[username]['password'], password):
|
||||
return {'authkey': self.users_table[username]['authkey']}
|
||||
return {'error': 'User/Password invalid.'}, 401
|
||||
|
||||
@api.doc(body=token_request_fields)
|
||||
def post(self):
|
||||
auth: Dict = request.get_json(force=True)
|
||||
@api.doc(body=token_request_fields) # type: ignore[misc]
|
||||
def post(self) -> dict[str, str] | tuple[dict[str, str], int]:
|
||||
auth: dict[str, Any] = request.get_json(force=True)
|
||||
if 'username' in auth and 'password' in auth: # Expected keys in json
|
||||
if (auth['username'] in self.users_table
|
||||
and check_password_hash(self.users_table[auth['username']]['password'], auth['password'])):
|
||||
|
@ -73,13 +75,13 @@ class AuthToken(Resource):
|
|||
@api.route('/json/<string:capture_uuid>/status')
|
||||
@api.doc(description='Get the status of a capture',
|
||||
params={'capture_uuid': 'The UUID of the capture'})
|
||||
class CaptureStatusQuery(Resource):
|
||||
class CaptureStatusQuery(Resource): # type: ignore[misc]
|
||||
|
||||
@api.param('with_error', 'Add the error message of the capture (if there is one)')
|
||||
def get(self, capture_uuid: str):
|
||||
@api.param('with_error', 'Add the error message of the capture (if there is one)') # type: ignore[misc]
|
||||
def get(self, capture_uuid: str) -> dict[str, Any]:
|
||||
with_error: bool = True if request.args.get('with_error') else False
|
||||
status_code = lookyloo.get_capture_status(capture_uuid)
|
||||
to_return: Dict[str, Any] = {'status_code': status_code}
|
||||
to_return: dict[str, Any] = {'status_code': status_code}
|
||||
if status_code in [CaptureStatusCore.DONE, CaptureStatusPy.DONE] and with_error:
|
||||
cache = lookyloo.capture_cache(capture_uuid)
|
||||
if cache and cache.error:
|
||||
|
@ -90,40 +92,40 @@ class CaptureStatusQuery(Resource):
|
|||
@api.route('/json/<string:capture_uuid>/hostnames')
|
||||
@api.doc(description='Get all the hostnames of all the resources of a capture',
|
||||
params={'capture_uuid': 'The UUID of the capture'})
|
||||
class CaptureHostnames(Resource):
|
||||
def get(self, capture_uuid: str):
|
||||
class CaptureHostnames(Resource): # type: ignore[misc]
|
||||
def get(self, capture_uuid: str) -> dict[str, Any] | tuple[dict[str, Any], int]:
|
||||
cache = lookyloo.capture_cache(capture_uuid)
|
||||
if not cache:
|
||||
return {'error': 'UUID missing in cache, try again later and check the status first.'}, 400
|
||||
to_return: Dict[str, Any] = {'response': {'hostnames': list(lookyloo.get_hostnames(capture_uuid))}}
|
||||
to_return: dict[str, Any] = {'response': {'hostnames': list(lookyloo.get_hostnames(capture_uuid))}}
|
||||
return to_return
|
||||
|
||||
|
||||
@api.route('/json/<string:capture_uuid>/urls')
|
||||
@api.doc(description='Get all the URLs of all the resources of a capture',
|
||||
params={'capture_uuid': 'The UUID of the capture'})
|
||||
class CaptureURLs(Resource):
|
||||
def get(self, capture_uuid: str):
|
||||
class CaptureURLs(Resource): # type: ignore[misc]
|
||||
def get(self, capture_uuid: str) -> dict[str, Any] | tuple[dict[str, Any], int]:
|
||||
cache = lookyloo.capture_cache(capture_uuid)
|
||||
if not cache:
|
||||
return {'error': 'UUID missing in cache, try again later and check the status first.'}, 400
|
||||
to_return: Dict[str, Any] = {'response': {'urls': list(lookyloo.get_urls(capture_uuid))}}
|
||||
to_return: dict[str, Any] = {'response': {'urls': list(lookyloo.get_urls(capture_uuid))}}
|
||||
return to_return
|
||||
|
||||
|
||||
@api.route('/json/<string:capture_uuid>/hashes')
|
||||
@api.doc(description='Get all the hashes of all the resources of a capture',
|
||||
params={'capture_uuid': 'The UUID of the capture'})
|
||||
class CaptureHashes(Resource):
|
||||
class CaptureHashes(Resource): # type: ignore[misc]
|
||||
# Note: shake algos require a length for the digest, discarding them.
|
||||
supported_hash_algos = [algo for algo in hashlib.algorithms_available if not algo.startswith('shake')]
|
||||
|
||||
# NOTE: the SHA512 hashes are pre-computed in the tree, anything else must be computed on the spot
|
||||
# so we return the SHA512 hashes by default
|
||||
|
||||
@api.param('algorithm', default='sha512', description=f'Algorithm of the hashes (default: sha512). Supported options: {", ".join(supported_hash_algos)}')
|
||||
@api.param('hashes_only', default=1, description='If 1 (default), only returns a list hashes instead of a dictionary of hashes with their respective URLs..')
|
||||
def get(self, capture_uuid: str):
|
||||
@api.param('algorithm', default='sha512', description=f'Algorithm of the hashes (default: sha512). Supported options: {", ".join(supported_hash_algos)}') # type: ignore[misc]
|
||||
@api.param('hashes_only', default=1, description='If 1 (default), only returns a list hashes instead of a dictionary of hashes with their respective URLs..') # type: ignore[misc]
|
||||
def get(self, capture_uuid: str) -> dict[str, Any] | tuple[dict[str, Any], int]:
|
||||
cache = lookyloo.capture_cache(capture_uuid)
|
||||
if not cache:
|
||||
return {'error': 'UUID missing in cache, try again later and check the status first.'}, 400
|
||||
|
@ -131,7 +133,7 @@ class CaptureHashes(Resource):
|
|||
algorithm = request.args['algorithm'].lower() if request.args.get('algorithm') else 'sha512'
|
||||
hashes_only = False if 'hashes_only' in request.args and request.args['hashes_only'] in [0, '0'] else True
|
||||
if algorithm == 'sha512' and hashes_only:
|
||||
to_return: Dict[str, Any] = {'response': {'hashes': list(lookyloo.get_hashes(capture_uuid))}}
|
||||
to_return: dict[str, Any] = {'response': {'hashes': list(lookyloo.get_hashes(capture_uuid))}}
|
||||
else:
|
||||
hashes = lookyloo.get_hashes_with_context(capture_uuid, algorithm=algorithm, urls_only=True)
|
||||
to_return = {'response': {'hashes': list(hashes.keys())}}
|
||||
|
@ -143,13 +145,13 @@ class CaptureHashes(Resource):
|
|||
@api.route('/json/<string:capture_uuid>/redirects')
|
||||
@api.doc(description='Get all the redirects of a capture',
|
||||
params={'capture_uuid': 'The UUID of the capture'})
|
||||
class CaptureRedirects(Resource):
|
||||
def get(self, capture_uuid: str):
|
||||
class CaptureRedirects(Resource): # type: ignore[misc]
|
||||
def get(self, capture_uuid: str) -> dict[str, Any] | tuple[dict[str, Any], int]:
|
||||
cache = lookyloo.capture_cache(capture_uuid)
|
||||
if not cache:
|
||||
return {'error': 'UUID missing in cache, try again later and check the status first.'}, 400
|
||||
|
||||
to_return: Dict[str, Any] = {}
|
||||
to_return: dict[str, Any] = {}
|
||||
try:
|
||||
to_return = {'response': {'url': cache.url,
|
||||
'redirects': cache.redirects if cache.redirects else []}}
|
||||
|
@ -166,8 +168,8 @@ class CaptureRedirects(Resource):
|
|||
@api.route('/json/<string:capture_uuid>/misp_export')
|
||||
@api.doc(description='Get an export of the capture in MISP format',
|
||||
params={'capture_uuid': 'The UUID of the capture'})
|
||||
class MISPExport(Resource):
|
||||
def get(self, capture_uuid: str):
|
||||
class MISPExport(Resource): # type: ignore[misc]
|
||||
def get(self, capture_uuid: str) -> dict[str, Any] | list[dict[str, Any]]:
|
||||
with_parents = request.args.get('with_parents')
|
||||
event = lookyloo.misp_export(capture_uuid, True if with_parents else False)
|
||||
if isinstance(event, dict):
|
||||
|
@ -192,12 +194,12 @@ misp_push_fields = api.model('MISPPushFields', {
|
|||
@api.doc(description='Push an event to a pre-configured MISP instance',
|
||||
params={'capture_uuid': 'The UUID of the capture'},
|
||||
security='apikey')
|
||||
class MISPPush(Resource):
|
||||
class MISPPush(Resource): # type: ignore[misc]
|
||||
method_decorators = [api_auth_check]
|
||||
|
||||
@api.param('with_parents', 'Also push the parents of the capture (if any)')
|
||||
@api.param('allow_duplicates', 'Push the event even if it is already present on the MISP instance')
|
||||
def get(self, capture_uuid: str, instance_name: Optional[str]=None):
|
||||
@api.param('with_parents', 'Also push the parents of the capture (if any)') # type: ignore[misc]
|
||||
@api.param('allow_duplicates', 'Push the event even if it is already present on the MISP instance') # type: ignore[misc]
|
||||
def get(self, capture_uuid: str, instance_name: str | None=None) -> dict[str, Any] | list[dict[str, Any]]:
|
||||
with_parents = True if request.args.get('with_parents') else False
|
||||
allow_duplicates = True if request.args.get('allow_duplicates') else False
|
||||
|
||||
|
@ -208,7 +210,7 @@ class MISPPush(Resource):
|
|||
else:
|
||||
return {'error': f'MISP instance "{instance_name}" does not exists.'}
|
||||
|
||||
to_return: Dict = {}
|
||||
to_return: dict[str, Any] = {}
|
||||
if not misp.available:
|
||||
to_return['error'] = 'MISP module not available.'
|
||||
elif not misp.enable_push:
|
||||
|
@ -229,9 +231,9 @@ class MISPPush(Resource):
|
|||
|
||||
return to_return
|
||||
|
||||
@api.doc(body=misp_push_fields)
|
||||
def post(self, capture_uuid: str, instance_name: Optional[str]=None):
|
||||
parameters: Dict = request.get_json(force=True)
|
||||
@api.doc(body=misp_push_fields) # type: ignore[misc]
|
||||
def post(self, capture_uuid: str, instance_name: str | None=None) -> dict[str, Any] | list[dict[str, Any]]:
|
||||
parameters: dict[str, Any] = request.get_json(force=True)
|
||||
with_parents = True if parameters.get('with_parents') else False
|
||||
allow_duplicates = True if parameters.get('allow_duplicates') else False
|
||||
if instance_name is None:
|
||||
|
@ -241,7 +243,7 @@ class MISPPush(Resource):
|
|||
else:
|
||||
return {'error': f'MISP instance "{instance_name}" does not exists.'}
|
||||
|
||||
to_return: Dict = {}
|
||||
to_return: dict[str, Any] = {}
|
||||
if not misp.available:
|
||||
to_return['error'] = 'MISP module not available.'
|
||||
elif not misp.enable_push:
|
||||
|
@ -272,10 +274,10 @@ trigger_modules_fields = api.model('TriggerModulesFields', {
|
|||
@api.route('/json/<string:capture_uuid>/trigger_modules')
|
||||
@api.doc(description='Trigger all the available 3rd party modules on the given capture',
|
||||
params={'capture_uuid': 'The UUID of the capture'})
|
||||
class TriggerModules(Resource):
|
||||
@api.doc(body=trigger_modules_fields)
|
||||
def post(self, capture_uuid: str):
|
||||
parameters: Dict = request.get_json(force=True)
|
||||
class TriggerModules(Resource): # type: ignore[misc]
|
||||
@api.doc(body=trigger_modules_fields) # type: ignore[misc]
|
||||
def post(self, capture_uuid: str) -> dict[str, Any]:
|
||||
parameters: dict[str, Any] = request.get_json(force=True)
|
||||
force = True if parameters.get('force') else False
|
||||
return lookyloo.trigger_modules(capture_uuid, force=force)
|
||||
|
||||
|
@ -283,12 +285,12 @@ class TriggerModules(Resource):
|
|||
@api.route('/json/hash_info/<h>')
|
||||
@api.doc(description='Search for a ressource with a specific hash (sha512)',
|
||||
params={'h': 'The hash (sha512)'})
|
||||
class HashInfo(Resource):
|
||||
def get(self, h: str):
|
||||
class HashInfo(Resource): # type: ignore[misc]
|
||||
def get(self, h: str) -> dict[str, Any] | tuple[dict[str, Any], int]:
|
||||
details, body = lookyloo.get_body_hash_full(h)
|
||||
if not details:
|
||||
return {'error': 'Unknown Hash.'}, 400
|
||||
to_return: Dict[str, Any] = {'response': {'hash': h, 'details': details,
|
||||
to_return: dict[str, Any] = {'response': {'hash': h, 'details': details,
|
||||
'body': base64.b64encode(body.getvalue()).decode()}}
|
||||
return to_return
|
||||
|
||||
|
@ -302,11 +304,11 @@ url_info_fields = api.model('URLInfoFields', {
|
|||
|
||||
@api.route('/json/url_info')
|
||||
@api.doc(description='Search for a URL')
|
||||
class URLInfo(Resource):
|
||||
class URLInfo(Resource): # type: ignore[misc]
|
||||
|
||||
@api.doc(body=url_info_fields)
|
||||
def post(self):
|
||||
to_query: Dict = request.get_json(force=True)
|
||||
@api.doc(body=url_info_fields) # type: ignore[misc]
|
||||
def post(self) -> list[dict[str, Any]]:
|
||||
to_query: dict[str, Any] = request.get_json(force=True)
|
||||
occurrences = lookyloo.get_url_occurrences(to_query.pop('url'), **to_query)
|
||||
return occurrences
|
||||
|
||||
|
@ -320,51 +322,50 @@ hostname_info_fields = api.model('HostnameInfoFields', {
|
|||
|
||||
@api.route('/json/hostname_info')
|
||||
@api.doc(description='Search for a hostname')
|
||||
class HostnameInfo(Resource):
|
||||
class HostnameInfo(Resource): # type: ignore[misc]
|
||||
|
||||
@api.doc(body=hostname_info_fields)
|
||||
def post(self):
|
||||
to_query: Dict = request.get_json(force=True)
|
||||
occurrences = lookyloo.get_hostname_occurrences(to_query.pop('hostname'), **to_query)
|
||||
return occurrences
|
||||
@api.doc(body=hostname_info_fields) # type: ignore[misc]
|
||||
def post(self) -> list[dict[str, Any]]:
|
||||
to_query: dict[str, Any] = request.get_json(force=True)
|
||||
return lookyloo.get_hostname_occurrences(to_query.pop('hostname'), **to_query)
|
||||
|
||||
|
||||
@api.route('/json/stats')
|
||||
@api.doc(description='Get the statistics of the lookyloo instance.')
|
||||
class InstanceStats(Resource):
|
||||
def get(self):
|
||||
class InstanceStats(Resource): # type: ignore[misc]
|
||||
def get(self) -> dict[str, Any]:
|
||||
return lookyloo.get_stats()
|
||||
|
||||
|
||||
@api.route('/json/devices')
|
||||
@api.doc(description='Get the list of devices pre-configured on the platform')
|
||||
class Devices(Resource):
|
||||
class Devices(Resource): # type: ignore[misc]
|
||||
|
||||
def get(self):
|
||||
def get(self) -> dict[str, Any]:
|
||||
return lookyloo.get_playwright_devices()
|
||||
|
||||
|
||||
@api.route('/json/<string:capture_uuid>/stats')
|
||||
@api.doc(description='Get the statistics of the capture.',
|
||||
params={'capture_uuid': 'The UUID of the capture'})
|
||||
class CaptureStats(Resource):
|
||||
def get(self, capture_uuid: str):
|
||||
class CaptureStats(Resource): # type: ignore[misc]
|
||||
def get(self, capture_uuid: str) -> dict[str, Any]:
|
||||
return lookyloo.get_statistics(capture_uuid)
|
||||
|
||||
|
||||
@api.route('/json/<string:capture_uuid>/info')
|
||||
@api.doc(description='Get basic information about the capture.',
|
||||
params={'capture_uuid': 'The UUID of the capture'})
|
||||
class CaptureInfo(Resource):
|
||||
def get(self, capture_uuid: str):
|
||||
class CaptureInfo(Resource): # type: ignore[misc]
|
||||
def get(self, capture_uuid: str) -> dict[str, Any]:
|
||||
return lookyloo.get_info(capture_uuid)
|
||||
|
||||
|
||||
@api.route('/json/<string:capture_uuid>/cookies')
|
||||
@api.doc(description='Get the complete cookie jar created during the capture.',
|
||||
params={'capture_uuid': 'The UUID of the capture'})
|
||||
class CaptureCookies(Resource):
|
||||
def get(self, capture_uuid: str):
|
||||
class CaptureCookies(Resource): # type: ignore[misc]
|
||||
def get(self, capture_uuid: str) -> dict[str, Any]:
|
||||
return json.loads(lookyloo.get_cookies(capture_uuid).read())
|
||||
|
||||
|
||||
|
@ -392,17 +393,17 @@ submit_fields_post = api.model('SubmitFieldsPost', {
|
|||
|
||||
|
||||
@api.route('/submit')
|
||||
class SubmitCapture(Resource):
|
||||
class SubmitCapture(Resource): # type: ignore[misc]
|
||||
|
||||
@api.param('url', 'The URL to capture', required=True)
|
||||
@api.param('listing', 'Display the capture on the index', default=1)
|
||||
@api.param('user_agent', 'User agent to use for the capture')
|
||||
@api.param('browser_name', 'Use this browser. Must be chromium, firefox or webkit.')
|
||||
@api.param('device_name', 'Use the pre-configured settings for this device')
|
||||
@api.param('referer', 'Referer to pass to the capture')
|
||||
@api.param('proxy', 'Proxy to use for the the capture')
|
||||
@api.produces(['text/text'])
|
||||
def get(self):
|
||||
@api.param('url', 'The URL to capture', required=True) # type: ignore[misc]
|
||||
@api.param('listing', 'Display the capture on the index', default=1) # type: ignore[misc]
|
||||
@api.param('user_agent', 'User agent to use for the capture') # type: ignore[misc]
|
||||
@api.param('browser_name', 'Use this browser. Must be chromium, firefox or webkit.') # type: ignore[misc]
|
||||
@api.param('device_name', 'Use the pre-configured settings for this device') # type: ignore[misc]
|
||||
@api.param('referer', 'Referer to pass to the capture') # type: ignore[misc]
|
||||
@api.param('proxy', 'Proxy to use for the the capture') # type: ignore[misc]
|
||||
@api.produces(['text/text']) # type: ignore[misc]
|
||||
def get(self) -> str | tuple[str, int]:
|
||||
if flask_login.current_user.is_authenticated:
|
||||
user = flask_login.current_user.get_id()
|
||||
else:
|
||||
|
@ -430,9 +431,9 @@ class SubmitCapture(Resource):
|
|||
perma_uuid = lookyloo.enqueue_capture(to_query, source='api', user=user, authenticated=flask_login.current_user.is_authenticated)
|
||||
return perma_uuid
|
||||
|
||||
@api.doc(body=submit_fields_post)
|
||||
@api.produces(['text/text'])
|
||||
def post(self):
|
||||
@api.doc(body=submit_fields_post) # type: ignore[misc]
|
||||
@api.produces(['text/text']) # type: ignore[misc]
|
||||
def post(self) -> str:
|
||||
if flask_login.current_user.is_authenticated:
|
||||
user = flask_login.current_user.get_id()
|
||||
else:
|
||||
|
@ -447,30 +448,30 @@ class SubmitCapture(Resource):
|
|||
@api.route('/bin/<string:capture_uuid>/screenshot')
|
||||
@api.doc(description='Get the screenshot associated to the capture.',
|
||||
params={'capture_uuid': 'The UUID of the capture'})
|
||||
class CaptureScreenshot(Resource):
|
||||
class CaptureScreenshot(Resource): # type: ignore[misc]
|
||||
|
||||
@api.produces(['image/png'])
|
||||
def get(self, capture_uuid: str):
|
||||
@api.produces(['image/png']) # type: ignore[misc]
|
||||
def get(self, capture_uuid: str) -> Response:
|
||||
return send_file(lookyloo.get_screenshot(capture_uuid), mimetype='image/png')
|
||||
|
||||
|
||||
@api.route('/bin/<string:capture_uuid>/export')
|
||||
@api.doc(description='Get all the files generated by the capture, except the pickle.',
|
||||
params={'capture_uuid': 'The UUID of the capture'})
|
||||
class CaptureExport(Resource):
|
||||
class CaptureExport(Resource): # type: ignore[misc]
|
||||
|
||||
@api.produces(['application/zip'])
|
||||
def get(self, capture_uuid: str):
|
||||
@api.produces(['application/zip']) # type: ignore[misc]
|
||||
def get(self, capture_uuid: str) -> Response:
|
||||
return send_file(lookyloo.get_capture(capture_uuid), mimetype='application/zip')
|
||||
|
||||
|
||||
@api.route('/bin/<string:capture_uuid>/data')
|
||||
@api.doc(description='Get the file downloaded by the capture.',
|
||||
params={'capture_uuid': 'The UUID of the capture'})
|
||||
class CaptureData(Resource):
|
||||
class CaptureData(Resource): # type: ignore[misc]
|
||||
|
||||
@api.produces(['application/zip'])
|
||||
def get(self, capture_uuid: str):
|
||||
@api.produces(['application/zip']) # type: ignore[misc]
|
||||
def get(self, capture_uuid: str) -> Response:
|
||||
filename, data = lookyloo.get_data(capture_uuid)
|
||||
if not filename:
|
||||
# This capture didn't trigger a download.
|
||||
|
@ -499,10 +500,10 @@ compare_captures_fields = api.model('CompareCapturesFields', {
|
|||
|
||||
@api.route('/json/compare_captures')
|
||||
@api.doc(description='Compare two captures')
|
||||
class CompareCaptures(Resource):
|
||||
@api.doc(body=compare_captures_fields)
|
||||
def post(self):
|
||||
parameters: Dict = request.get_json(force=True)
|
||||
class CompareCaptures(Resource): # type: ignore[misc]
|
||||
@api.doc(body=compare_captures_fields) # type: ignore[misc]
|
||||
def post(self) -> dict[str, Any]:
|
||||
parameters: dict[str, Any] = request.get_json(force=True)
|
||||
left_uuid = parameters.get('capture_left')
|
||||
right_uuid = parameters.get('capture_right')
|
||||
if not left_uuid or not right_uuid:
|
||||
|
@ -545,10 +546,10 @@ comparables_model = api.model('ComparablesModel', {
|
|||
|
||||
@api.route('/json/<string:capture_uuid>/comparables')
|
||||
@api.doc(description='Get the data we can compare across captures')
|
||||
class Comparables(Resource):
|
||||
class Comparables(Resource): # type: ignore[misc]
|
||||
|
||||
@api.marshal_with(comparables_model)
|
||||
def get(self, capture_uuid: str):
|
||||
@api.marshal_with(comparables_model) # type: ignore[misc]
|
||||
def get(self, capture_uuid: str) -> dict[str, Any]:
|
||||
return comparator.get_comparables_capture(capture_uuid)
|
||||
|
||||
|
||||
|
@ -561,10 +562,10 @@ takedown_fields = api.model('TakedownFields', {
|
|||
|
||||
@api.route('/json/takedown')
|
||||
@api.doc(description='Get information for triggering a takedown request')
|
||||
class Takedown(Resource):
|
||||
@api.doc(body=takedown_fields)
|
||||
def post(self):
|
||||
parameters: Dict = request.get_json(force=True)
|
||||
class Takedown(Resource): # type: ignore[misc]
|
||||
@api.doc(body=takedown_fields) # type: ignore[misc]
|
||||
def post(self) -> list[dict[str, Any]] | dict[str, str]:
|
||||
parameters: dict[str, Any] = request.get_json(force=True)
|
||||
capture_uuid = parameters.get('capture_uuid')
|
||||
if not capture_uuid:
|
||||
return {'error': f'Invalid request: {parameters}'}
|
||||
|
@ -576,10 +577,10 @@ class Takedown(Resource):
|
|||
@api.route('/admin/rebuild_all')
|
||||
@api.doc(description='Rebuild all the trees. WARNING: IT IS GOING TO TAKE A VERY LONG TIME.',
|
||||
security='apikey')
|
||||
class RebuildAll(Resource):
|
||||
class RebuildAll(Resource): # type: ignore[misc]
|
||||
method_decorators = [api_auth_check]
|
||||
|
||||
def post(self):
|
||||
def post(self) -> dict[str, str] | tuple[dict[str, str], int]:
|
||||
try:
|
||||
lookyloo.rebuild_all()
|
||||
except Exception as e:
|
||||
|
@ -591,10 +592,10 @@ class RebuildAll(Resource):
|
|||
@api.route('/admin/rebuild_all_cache')
|
||||
@api.doc(description='Rebuild all the caches. It will take a while, but less that rebuild all.',
|
||||
security='apikey')
|
||||
class RebuildAllCache(Resource):
|
||||
class RebuildAllCache(Resource): # type: ignore[misc]
|
||||
method_decorators = [api_auth_check]
|
||||
|
||||
def post(self):
|
||||
def post(self) -> dict[str, str] | tuple[dict[str, str], int]:
|
||||
try:
|
||||
lookyloo.rebuild_cache()
|
||||
except Exception as e:
|
||||
|
@ -607,10 +608,10 @@ class RebuildAllCache(Resource):
|
|||
@api.doc(description='Rebuild the tree.',
|
||||
params={'capture_uuid': 'The UUID of the capture'},
|
||||
security='apikey')
|
||||
class CaptureRebuildTree(Resource):
|
||||
class CaptureRebuildTree(Resource): # type: ignore[misc]
|
||||
method_decorators = [api_auth_check]
|
||||
|
||||
def post(self, capture_uuid):
|
||||
def post(self, capture_uuid: str) -> dict[str, str] | tuple[dict[str, str], int]:
|
||||
try:
|
||||
lookyloo.remove_pickle(capture_uuid)
|
||||
lookyloo.get_crawled_tree(capture_uuid)
|
||||
|
@ -624,10 +625,10 @@ class CaptureRebuildTree(Resource):
|
|||
@api.doc(description='Hide the capture from the index.',
|
||||
params={'capture_uuid': 'The UUID of the capture'},
|
||||
security='apikey')
|
||||
class CaptureHide(Resource):
|
||||
class CaptureHide(Resource): # type: ignore[misc]
|
||||
method_decorators = [api_auth_check]
|
||||
|
||||
def post(self, capture_uuid):
|
||||
def post(self, capture_uuid: str) -> dict[str, str] | tuple[dict[str, str], int]:
|
||||
try:
|
||||
lookyloo.hide_capture(capture_uuid)
|
||||
except Exception as e:
|
||||
|
|
|
@ -8,6 +8,7 @@ from pathlib import Path
|
|||
from typing import Dict, List, Union
|
||||
|
||||
import flask_login # type: ignore
|
||||
from flask import Request
|
||||
from werkzeug.security import generate_password_hash
|
||||
|
||||
from lookyloo.default import get_config, get_homedir
|
||||
|
@ -23,7 +24,7 @@ def get_lookyloo_instance() -> Lookyloo:
|
|||
return __global_lookyloo_instance
|
||||
|
||||
|
||||
def src_request_ip(request) -> str:
|
||||
def src_request_ip(request: Request) -> str | None:
|
||||
# NOTE: X-Real-IP is the IP passed by the reverse proxy in the headers.
|
||||
real_ip = request.headers.get('X-Real-IP')
|
||||
if not real_ip:
|
||||
|
@ -31,11 +32,11 @@ def src_request_ip(request) -> str:
|
|||
return real_ip
|
||||
|
||||
|
||||
class User(flask_login.UserMixin):
|
||||
class User(flask_login.UserMixin): # type: ignore[misc]
|
||||
pass
|
||||
|
||||
|
||||
def load_user_from_request(request):
|
||||
def load_user_from_request(request: Request) -> User | None:
|
||||
api_key = request.headers.get('Authorization')
|
||||
if not api_key:
|
||||
return None
|
||||
|
|
Loading…
Reference in New Issue