mirror of https://github.com/CIRCL/lookyloo
chg: Use new annotations
parent
0b5128e5b4
commit
ee1ad48b25
|
@ -3,14 +3,14 @@
|
||||||
exclude: "user_agents|website/web/sri.txt"
|
exclude: "user_agents|website/web/sri.txt"
|
||||||
repos:
|
repos:
|
||||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||||
rev: v4.1.0
|
rev: v4.5.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: trailing-whitespace
|
- id: trailing-whitespace
|
||||||
- id: end-of-file-fixer
|
- id: end-of-file-fixer
|
||||||
- id: check-yaml
|
- id: check-yaml
|
||||||
- id: check-added-large-files
|
- id: check-added-large-files
|
||||||
- repo: https://github.com/asottile/pyupgrade
|
- repo: https://github.com/asottile/pyupgrade
|
||||||
rev: v2.31.1
|
rev: v3.15.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: pyupgrade
|
- id: pyupgrade
|
||||||
args: [--py38-plus]
|
args: [--py38-plus]
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import csv
|
import csv
|
||||||
import gzip
|
import gzip
|
||||||
import logging
|
import logging
|
||||||
|
@ -23,7 +25,7 @@ logging.config.dictConfig(get_config('logging'))
|
||||||
|
|
||||||
class Archiver(AbstractManager):
|
class Archiver(AbstractManager):
|
||||||
|
|
||||||
def __init__(self, loglevel: Optional[int]=None):
|
def __init__(self, loglevel: int | None=None) -> None:
|
||||||
super().__init__(loglevel)
|
super().__init__(loglevel)
|
||||||
self.script_name = 'archiver'
|
self.script_name = 'archiver'
|
||||||
self.redis = Redis(unix_socket_path=get_socket_path('cache'))
|
self.redis = Redis(unix_socket_path=get_socket_path('cache'))
|
||||||
|
@ -54,7 +56,7 @@ class Archiver(AbstractManager):
|
||||||
self.s3fs_bucket = s3fs_config['config']['bucket_name']
|
self.s3fs_bucket = s3fs_config['config']['bucket_name']
|
||||||
self.s3fs_client.clear_multipart_uploads(self.s3fs_bucket)
|
self.s3fs_client.clear_multipart_uploads(self.s3fs_bucket)
|
||||||
|
|
||||||
def _to_run_forever(self):
|
def _to_run_forever(self) -> None:
|
||||||
archiving_done = False
|
archiving_done = False
|
||||||
# NOTE: When we archive a big directory, moving *a lot* of files, expecially to MinIO
|
# NOTE: When we archive a big directory, moving *a lot* of files, expecially to MinIO
|
||||||
# can take a very long time. In order to avoid being stuck on the archiving, we break that in chunks
|
# can take a very long time. In order to avoid being stuck on the archiving, we break that in chunks
|
||||||
|
@ -71,14 +73,14 @@ class Archiver(AbstractManager):
|
||||||
# This call takes a very long time on MinIO
|
# This call takes a very long time on MinIO
|
||||||
self._update_all_capture_indexes()
|
self._update_all_capture_indexes()
|
||||||
|
|
||||||
def _update_index(self, root_dir: Path, *, s3fs_parent_dir: Optional[str]=None) -> Optional[Path]:
|
def _update_index(self, root_dir: Path, *, s3fs_parent_dir: str | None=None) -> Path | None:
|
||||||
# returns a path to the index for the given directory
|
# returns a path to the index for the given directory
|
||||||
logmsg = f'Updating index for {root_dir}'
|
logmsg = f'Updating index for {root_dir}'
|
||||||
if s3fs_parent_dir:
|
if s3fs_parent_dir:
|
||||||
logmsg = f'{logmsg} (s3fs)'
|
logmsg = f'{logmsg} (s3fs)'
|
||||||
self.logger.info(logmsg)
|
self.logger.info(logmsg)
|
||||||
|
|
||||||
current_index: Dict[str, str] = {}
|
current_index: dict[str, str] = {}
|
||||||
index_file = root_dir / 'index'
|
index_file = root_dir / 'index'
|
||||||
if index_file.exists():
|
if index_file.exists():
|
||||||
try:
|
try:
|
||||||
|
@ -91,11 +93,11 @@ class Archiver(AbstractManager):
|
||||||
# NOTE: should we remove if it has subs?
|
# NOTE: should we remove if it has subs?
|
||||||
index_file.unlink()
|
index_file.unlink()
|
||||||
|
|
||||||
sub_indexes: List[Path] = []
|
sub_indexes: list[Path] = []
|
||||||
current_index_dirs: Set[str] = set(current_index.values())
|
current_index_dirs: set[str] = set(current_index.values())
|
||||||
new_captures: Set[Path] = set()
|
new_captures: set[Path] = set()
|
||||||
# Directories that are actually in the listing.
|
# Directories that are actually in the listing.
|
||||||
current_dirs: Set[str] = set()
|
current_dirs: set[str] = set()
|
||||||
|
|
||||||
if s3fs_parent_dir:
|
if s3fs_parent_dir:
|
||||||
s3fs_dir = '/'.join([s3fs_parent_dir, root_dir.name])
|
s3fs_dir = '/'.join([s3fs_parent_dir, root_dir.name])
|
||||||
|
@ -212,7 +214,7 @@ class Archiver(AbstractManager):
|
||||||
|
|
||||||
return index_file
|
return index_file
|
||||||
|
|
||||||
def _update_all_capture_indexes(self, *, recent_only: bool=False):
|
def _update_all_capture_indexes(self, *, recent_only: bool=False) -> None:
|
||||||
'''Run that after the captures are in the proper directories'''
|
'''Run that after the captures are in the proper directories'''
|
||||||
# Recent captures
|
# Recent captures
|
||||||
self.logger.info('Update recent indexes')
|
self.logger.info('Update recent indexes')
|
||||||
|
@ -278,7 +280,7 @@ class Archiver(AbstractManager):
|
||||||
|
|
||||||
return dest_dir / capture_path.name
|
return dest_dir / capture_path.name
|
||||||
|
|
||||||
def _archive(self):
|
def _archive(self) -> bool:
|
||||||
archive_interval = timedelta(days=get_config('generic', 'archive'))
|
archive_interval = timedelta(days=get_config('generic', 'archive'))
|
||||||
cut_time = (datetime.now() - archive_interval)
|
cut_time = (datetime.now() - archive_interval)
|
||||||
self.logger.info(f'Archiving all captures older than {cut_time.isoformat()}.')
|
self.logger.info(f'Archiving all captures older than {cut_time.isoformat()}.')
|
||||||
|
@ -340,7 +342,7 @@ class Archiver(AbstractManager):
|
||||||
self.logger.info('Archiving done.')
|
self.logger.info('Archiving done.')
|
||||||
return archiving_done
|
return archiving_done
|
||||||
|
|
||||||
def __load_index(self, index_path: Path, ignore_sub: bool=False) -> Dict[str, str]:
|
def __load_index(self, index_path: Path, ignore_sub: bool=False) -> dict[str, str]:
|
||||||
'''Loads the given index file and all the subsequent ones if they exist'''
|
'''Loads the given index file and all the subsequent ones if they exist'''
|
||||||
# NOTE: this method is used on recent and archived captures, it must never trigger a dir listing
|
# NOTE: this method is used on recent and archived captures, it must never trigger a dir listing
|
||||||
indexed_captures = {}
|
indexed_captures = {}
|
||||||
|
@ -359,7 +361,7 @@ class Archiver(AbstractManager):
|
||||||
indexed_captures[key] = str(index_path.parent / path_name)
|
indexed_captures[key] = str(index_path.parent / path_name)
|
||||||
return indexed_captures
|
return indexed_captures
|
||||||
|
|
||||||
def _load_indexes(self):
|
def _load_indexes(self) -> None:
|
||||||
# capture_dir / Year / Month / index <- should always exists. If not, created by _update_index
|
# capture_dir / Year / Month / index <- should always exists. If not, created by _update_index
|
||||||
# Initialize recent index
|
# Initialize recent index
|
||||||
for index in sorted(get_captures_dir().glob('*/*/index'), reverse=True):
|
for index in sorted(get_captures_dir().glob('*/*/index'), reverse=True):
|
||||||
|
@ -391,7 +393,7 @@ class Archiver(AbstractManager):
|
||||||
self.logger.info(f'Archived indexes loaded: {total_archived_captures} entries.')
|
self.logger.info(f'Archived indexes loaded: {total_archived_captures} entries.')
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main() -> None:
|
||||||
a = Archiver()
|
a = Archiver()
|
||||||
a.run(sleep_in_sec=3600)
|
a.run(sleep_in_sec=3600)
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
@ -10,7 +12,7 @@ from pathlib import Path
|
||||||
from typing import Optional, Set, Union
|
from typing import Optional, Set, Union
|
||||||
|
|
||||||
from lacuscore import LacusCore, CaptureStatus as CaptureStatusCore, CaptureResponse as CaptureResponseCore
|
from lacuscore import LacusCore, CaptureStatus as CaptureStatusCore, CaptureResponse as CaptureResponseCore
|
||||||
from pylacus import PyLacus, CaptureStatus as CaptureStatusPy, CaptureResponse as CaptureResponsePy
|
from pylacus import PyLacus, CaptureStatus as CaptureStatusPy, CaptureResponse as CaptureResponsePy # type: ignore[attr-defined]
|
||||||
|
|
||||||
from lookyloo.lookyloo import Lookyloo, CaptureSettings
|
from lookyloo.lookyloo import Lookyloo, CaptureSettings
|
||||||
from lookyloo.default import AbstractManager, get_config
|
from lookyloo.default import AbstractManager, get_config
|
||||||
|
@ -23,7 +25,7 @@ logging.config.dictConfig(get_config('logging'))
|
||||||
|
|
||||||
class AsyncCapture(AbstractManager):
|
class AsyncCapture(AbstractManager):
|
||||||
|
|
||||||
def __init__(self, loglevel: Optional[int]=None):
|
def __init__(self, loglevel: int | None=None) -> None:
|
||||||
super().__init__(loglevel)
|
super().__init__(loglevel)
|
||||||
self.script_name = 'async_capture'
|
self.script_name = 'async_capture'
|
||||||
self.only_global_lookups: bool = get_config('generic', 'only_global_lookups')
|
self.only_global_lookups: bool = get_config('generic', 'only_global_lookups')
|
||||||
|
@ -31,7 +33,7 @@ class AsyncCapture(AbstractManager):
|
||||||
self.lookyloo = Lookyloo()
|
self.lookyloo = Lookyloo()
|
||||||
|
|
||||||
if isinstance(self.lookyloo.lacus, LacusCore):
|
if isinstance(self.lookyloo.lacus, LacusCore):
|
||||||
self.captures: Set[asyncio.Task] = set()
|
self.captures: set[asyncio.Task] = set() # type: ignore[type-arg]
|
||||||
|
|
||||||
self.fox = FOX(config_name='FOX')
|
self.fox = FOX(config_name='FOX')
|
||||||
if not self.fox.available:
|
if not self.fox.available:
|
||||||
|
@ -41,23 +43,24 @@ class AsyncCapture(AbstractManager):
|
||||||
if self.fox.available:
|
if self.fox.available:
|
||||||
self.fox.capture_default_trigger(url, auto_trigger=True)
|
self.fox.capture_default_trigger(url, auto_trigger=True)
|
||||||
|
|
||||||
async def _trigger_captures(self):
|
async def _trigger_captures(self) -> None:
|
||||||
|
# Only called if LacusCore is used
|
||||||
max_new_captures = get_config('generic', 'async_capture_processes') - len(self.captures)
|
max_new_captures = get_config('generic', 'async_capture_processes') - len(self.captures)
|
||||||
self.logger.debug(f'{len(self.captures)} ongoing captures.')
|
self.logger.debug(f'{len(self.captures)} ongoing captures.')
|
||||||
if max_new_captures <= 0:
|
if max_new_captures <= 0:
|
||||||
self.logger.info(f'Max amount of captures in parallel reached ({len(self.captures)})')
|
self.logger.info(f'Max amount of captures in parallel reached ({len(self.captures)})')
|
||||||
return
|
return None
|
||||||
for capture_task in self.lookyloo.lacus.consume_queue(max_new_captures):
|
for capture_task in self.lookyloo.lacus.consume_queue(max_new_captures): # type: ignore[union-attr]
|
||||||
self.captures.add(capture_task)
|
self.captures.add(capture_task)
|
||||||
capture_task.add_done_callback(self.captures.discard)
|
capture_task.add_done_callback(self.captures.discard)
|
||||||
|
|
||||||
def uuids_ready(self):
|
def uuids_ready(self) -> list[str]:
|
||||||
return [uuid for uuid in self.lookyloo.redis.zrevrangebyscore('to_capture', 'Inf', '-Inf')
|
return [uuid for uuid in self.lookyloo.redis.zrevrangebyscore('to_capture', 'Inf', '-Inf')
|
||||||
if uuid and self.lookyloo.lacus.get_capture_status(uuid) in [CaptureStatusPy.DONE, CaptureStatusCore]]
|
if uuid and self.lookyloo.lacus.get_capture_status(uuid) in [CaptureStatusPy.DONE, CaptureStatusCore]]
|
||||||
|
|
||||||
def process_capture_queue(self) -> None:
|
def process_capture_queue(self) -> None:
|
||||||
'''Process a query from the capture queue'''
|
'''Process a query from the capture queue'''
|
||||||
entries: Union[CaptureResponseCore, CaptureResponsePy]
|
entries: CaptureResponseCore | CaptureResponsePy
|
||||||
for uuid in self.uuids_ready():
|
for uuid in self.uuids_ready():
|
||||||
if isinstance(self.lookyloo.lacus, LacusCore):
|
if isinstance(self.lookyloo.lacus, LacusCore):
|
||||||
entries = self.lookyloo.lacus.get_capture(uuid, decode=True)
|
entries = self.lookyloo.lacus.get_capture(uuid, decode=True)
|
||||||
|
@ -71,9 +74,9 @@ class AsyncCapture(AbstractManager):
|
||||||
self.logger.info(log)
|
self.logger.info(log)
|
||||||
|
|
||||||
self.lookyloo.redis.sadd('ongoing', uuid)
|
self.lookyloo.redis.sadd('ongoing', uuid)
|
||||||
queue: Optional[str] = self.lookyloo.redis.getdel(f'{uuid}_mgmt')
|
queue: str | None = self.lookyloo.redis.getdel(f'{uuid}_mgmt')
|
||||||
|
|
||||||
to_capture: CaptureSettings = self.lookyloo.redis.hgetall(uuid)
|
to_capture: CaptureSettings = self.lookyloo.redis.hgetall(uuid) # type: ignore[assignment]
|
||||||
|
|
||||||
if get_config('generic', 'default_public'):
|
if get_config('generic', 'default_public'):
|
||||||
# By default, the captures are on the index, unless the user mark them as un-listed
|
# By default, the captures are on the index, unless the user mark them as un-listed
|
||||||
|
@ -123,9 +126,9 @@ class AsyncCapture(AbstractManager):
|
||||||
self.unset_running()
|
self.unset_running()
|
||||||
self.logger.info(f'Done with {uuid}')
|
self.logger.info(f'Done with {uuid}')
|
||||||
|
|
||||||
async def _to_run_forever_async(self):
|
async def _to_run_forever_async(self) -> None:
|
||||||
if self.force_stop:
|
if self.force_stop:
|
||||||
return
|
return None
|
||||||
|
|
||||||
if isinstance(self.lookyloo.lacus, LacusCore):
|
if isinstance(self.lookyloo.lacus, LacusCore):
|
||||||
await self._trigger_captures()
|
await self._trigger_captures()
|
||||||
|
@ -135,7 +138,7 @@ class AsyncCapture(AbstractManager):
|
||||||
|
|
||||||
self.process_capture_queue()
|
self.process_capture_queue()
|
||||||
|
|
||||||
async def _wait_to_finish_async(self):
|
async def _wait_to_finish_async(self) -> None:
|
||||||
if isinstance(self.lookyloo.lacus, LacusCore):
|
if isinstance(self.lookyloo.lacus, LacusCore):
|
||||||
while self.captures:
|
while self.captures:
|
||||||
self.logger.info(f'Waiting for {len(self.captures)} capture(s) to finish...')
|
self.logger.info(f'Waiting for {len(self.captures)} capture(s) to finish...')
|
||||||
|
@ -147,7 +150,7 @@ class AsyncCapture(AbstractManager):
|
||||||
self.logger.info('No more captures')
|
self.logger.info('No more captures')
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main() -> None:
|
||||||
m = AsyncCapture()
|
m = AsyncCapture()
|
||||||
|
|
||||||
loop = asyncio.new_event_loop()
|
loop = asyncio.new_event_loop()
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import logging.config
|
import logging.config
|
||||||
import os
|
import os
|
||||||
|
@ -20,7 +22,7 @@ logging.config.dictConfig(get_config('logging'))
|
||||||
|
|
||||||
class BackgroundIndexer(AbstractManager):
|
class BackgroundIndexer(AbstractManager):
|
||||||
|
|
||||||
def __init__(self, loglevel: Optional[int]=None):
|
def __init__(self, loglevel: int | None=None):
|
||||||
super().__init__(loglevel)
|
super().__init__(loglevel)
|
||||||
self.lookyloo = Lookyloo()
|
self.lookyloo = Lookyloo()
|
||||||
self.script_name = 'background_indexer'
|
self.script_name = 'background_indexer'
|
||||||
|
@ -28,7 +30,7 @@ class BackgroundIndexer(AbstractManager):
|
||||||
self.discarded_captures_dir = self.lookyloo.capture_dir.parent / 'discarded_captures'
|
self.discarded_captures_dir = self.lookyloo.capture_dir.parent / 'discarded_captures'
|
||||||
self.discarded_captures_dir.mkdir(parents=True, exist_ok=True)
|
self.discarded_captures_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
def _to_run_forever(self):
|
def _to_run_forever(self) -> None:
|
||||||
all_done = self._build_missing_pickles()
|
all_done = self._build_missing_pickles()
|
||||||
if all_done:
|
if all_done:
|
||||||
self._check_indexes()
|
self._check_indexes()
|
||||||
|
@ -72,7 +74,7 @@ class BackgroundIndexer(AbstractManager):
|
||||||
# The capture with this UUID exists, but it is for some reason missing in lookup_dirs
|
# The capture with this UUID exists, but it is for some reason missing in lookup_dirs
|
||||||
self.lookyloo.redis.hset('lookup_dirs', uuid, str(path))
|
self.lookyloo.redis.hset('lookup_dirs', uuid, str(path))
|
||||||
else:
|
else:
|
||||||
cached_path = Path(self.lookyloo.redis.hget('lookup_dirs', uuid))
|
cached_path = Path(self.lookyloo.redis.hget('lookup_dirs', uuid)) # type: ignore[arg-type]
|
||||||
if cached_path != path:
|
if cached_path != path:
|
||||||
# we have a duplicate UUID, it is proably related to some bad copy/paste
|
# we have a duplicate UUID, it is proably related to some bad copy/paste
|
||||||
if cached_path.exists():
|
if cached_path.exists():
|
||||||
|
@ -118,13 +120,13 @@ class BackgroundIndexer(AbstractManager):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _check_indexes(self):
|
def _check_indexes(self) -> None:
|
||||||
index_redis = self.lookyloo.indexing.redis
|
index_redis = self.lookyloo.indexing.redis
|
||||||
can_index = index_redis.set('ongoing_indexing', 1, ex=3600, nx=True)
|
can_index = index_redis.set('ongoing_indexing', 1, ex=3600, nx=True)
|
||||||
if not can_index:
|
if not can_index:
|
||||||
# There is no reason to run this method in multiple scripts.
|
# There is no reason to run this method in multiple scripts.
|
||||||
self.logger.info('Indexing already ongoing in another process.')
|
self.logger.info('Indexing already ongoing in another process.')
|
||||||
return
|
return None
|
||||||
self.logger.info('Check indexes...')
|
self.logger.info('Check indexes...')
|
||||||
for cache in self.lookyloo.sorted_capture_cache(cached_captures_only=False):
|
for cache in self.lookyloo.sorted_capture_cache(cached_captures_only=False):
|
||||||
if self.lookyloo.is_public_instance and cache.no_index:
|
if self.lookyloo.is_public_instance and cache.no_index:
|
||||||
|
@ -163,7 +165,7 @@ class BackgroundIndexer(AbstractManager):
|
||||||
self.logger.info('... done.')
|
self.logger.info('... done.')
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main() -> None:
|
||||||
i = BackgroundIndexer()
|
i = BackgroundIndexer()
|
||||||
i.run(sleep_in_sec=60)
|
i.run(sleep_in_sec=60)
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
import logging
|
import logging
|
||||||
|
@ -8,7 +10,7 @@ from collections import Counter
|
||||||
from datetime import date, timedelta
|
from datetime import date, timedelta
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
from lookyloo.lookyloo import Lookyloo, CaptureStatusCore, CaptureStatusPy
|
from lookyloo.lookyloo import Lookyloo, CaptureStatusCore, CaptureStatusPy # type: ignore[attr-defined]
|
||||||
from lookyloo.default import AbstractManager, get_config, get_homedir, safe_create_dir
|
from lookyloo.default import AbstractManager, get_config, get_homedir, safe_create_dir
|
||||||
from lookyloo.helpers import ParsedUserAgent, serialize_to_json
|
from lookyloo.helpers import ParsedUserAgent, serialize_to_json
|
||||||
|
|
||||||
|
@ -17,19 +19,19 @@ logging.config.dictConfig(get_config('logging'))
|
||||||
|
|
||||||
class Processing(AbstractManager):
|
class Processing(AbstractManager):
|
||||||
|
|
||||||
def __init__(self, loglevel: Optional[int]=None):
|
def __init__(self, loglevel: int | None=None):
|
||||||
super().__init__(loglevel)
|
super().__init__(loglevel)
|
||||||
self.script_name = 'processing'
|
self.script_name = 'processing'
|
||||||
self.lookyloo = Lookyloo()
|
self.lookyloo = Lookyloo()
|
||||||
|
|
||||||
self.use_own_ua = get_config('generic', 'use_user_agents_users')
|
self.use_own_ua = get_config('generic', 'use_user_agents_users')
|
||||||
|
|
||||||
def _to_run_forever(self):
|
def _to_run_forever(self) -> None:
|
||||||
if self.use_own_ua:
|
if self.use_own_ua:
|
||||||
self._build_ua_file()
|
self._build_ua_file()
|
||||||
self._retry_failed_enqueue()
|
self._retry_failed_enqueue()
|
||||||
|
|
||||||
def _build_ua_file(self):
|
def _build_ua_file(self) -> None:
|
||||||
'''Build a file in a format compatible with the capture page'''
|
'''Build a file in a format compatible with the capture page'''
|
||||||
yesterday = (date.today() - timedelta(days=1))
|
yesterday = (date.today() - timedelta(days=1))
|
||||||
self_generated_ua_file_path = get_homedir() / 'own_user_agents' / str(yesterday.year) / f'{yesterday.month:02}'
|
self_generated_ua_file_path = get_homedir() / 'own_user_agents' / str(yesterday.year) / f'{yesterday.month:02}'
|
||||||
|
@ -44,7 +46,7 @@ class Processing(AbstractManager):
|
||||||
self.logger.info(f'No User-agent file for {yesterday} to generate.')
|
self.logger.info(f'No User-agent file for {yesterday} to generate.')
|
||||||
return
|
return
|
||||||
|
|
||||||
to_store: Dict[str, Any] = {'by_frequency': []}
|
to_store: dict[str, Any] = {'by_frequency': []}
|
||||||
uas = Counter([entry.split('|', 1)[1] for entry in entries])
|
uas = Counter([entry.split('|', 1)[1] for entry in entries])
|
||||||
for ua, _ in uas.most_common():
|
for ua, _ in uas.most_common():
|
||||||
parsed_ua = ParsedUserAgent(ua)
|
parsed_ua = ParsedUserAgent(ua)
|
||||||
|
@ -71,7 +73,7 @@ class Processing(AbstractManager):
|
||||||
self.lookyloo.redis.delete(f'user_agents|{yesterday.isoformat()}')
|
self.lookyloo.redis.delete(f'user_agents|{yesterday.isoformat()}')
|
||||||
self.logger.info(f'User-agent file for {yesterday} generated.')
|
self.logger.info(f'User-agent file for {yesterday} generated.')
|
||||||
|
|
||||||
def _retry_failed_enqueue(self):
|
def _retry_failed_enqueue(self) -> None:
|
||||||
'''If enqueuing failed, the settings are added, with a UUID in the 'to_capture key', and they have a UUID'''
|
'''If enqueuing failed, the settings are added, with a UUID in the 'to_capture key', and they have a UUID'''
|
||||||
for uuid in self.lookyloo.redis.zrevrangebyscore('to_capture', 'Inf', '-Inf'):
|
for uuid in self.lookyloo.redis.zrevrangebyscore('to_capture', 'Inf', '-Inf'):
|
||||||
try_reenqueue = False
|
try_reenqueue = False
|
||||||
|
@ -131,7 +133,7 @@ class Processing(AbstractManager):
|
||||||
self.logger.info(f'{uuid} enqueued.')
|
self.logger.info(f'{uuid} enqueued.')
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main() -> None:
|
||||||
p = Processing()
|
p = Processing()
|
||||||
p.run(sleep_in_sec=30)
|
p.run(sleep_in_sec=30)
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
|
@ -24,14 +26,14 @@ def check_running(name: str) -> bool:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def launch_cache(storage_directory: Optional[Path]=None):
|
def launch_cache(storage_directory: Path | None=None) -> None:
|
||||||
if not storage_directory:
|
if not storage_directory:
|
||||||
storage_directory = get_homedir()
|
storage_directory = get_homedir()
|
||||||
if not check_running('cache'):
|
if not check_running('cache'):
|
||||||
Popen(["./run_redis.sh"], cwd=(storage_directory / 'cache'))
|
Popen(["./run_redis.sh"], cwd=(storage_directory / 'cache'))
|
||||||
|
|
||||||
|
|
||||||
def shutdown_cache(storage_directory: Optional[Path]=None):
|
def shutdown_cache(storage_directory: Path | None=None) -> None:
|
||||||
if not storage_directory:
|
if not storage_directory:
|
||||||
storage_directory = get_homedir()
|
storage_directory = get_homedir()
|
||||||
r = Redis(unix_socket_path=get_socket_path('cache'))
|
r = Redis(unix_socket_path=get_socket_path('cache'))
|
||||||
|
@ -39,14 +41,14 @@ def shutdown_cache(storage_directory: Optional[Path]=None):
|
||||||
print('Redis cache database shutdown.')
|
print('Redis cache database shutdown.')
|
||||||
|
|
||||||
|
|
||||||
def launch_indexing(storage_directory: Optional[Path]=None):
|
def launch_indexing(storage_directory: Path | None=None) -> None:
|
||||||
if not storage_directory:
|
if not storage_directory:
|
||||||
storage_directory = get_homedir()
|
storage_directory = get_homedir()
|
||||||
if not check_running('indexing'):
|
if not check_running('indexing'):
|
||||||
Popen(["./run_redis.sh"], cwd=(storage_directory / 'indexing'))
|
Popen(["./run_redis.sh"], cwd=(storage_directory / 'indexing'))
|
||||||
|
|
||||||
|
|
||||||
def shutdown_indexing(storage_directory: Optional[Path]=None):
|
def shutdown_indexing(storage_directory: Path | None=None) -> None:
|
||||||
if not storage_directory:
|
if not storage_directory:
|
||||||
storage_directory = get_homedir()
|
storage_directory = get_homedir()
|
||||||
r = Redis(unix_socket_path=get_socket_path('indexing'))
|
r = Redis(unix_socket_path=get_socket_path('indexing'))
|
||||||
|
@ -54,13 +56,13 @@ def shutdown_indexing(storage_directory: Optional[Path]=None):
|
||||||
print('Redis indexing database shutdown.')
|
print('Redis indexing database shutdown.')
|
||||||
|
|
||||||
|
|
||||||
def launch_all():
|
def launch_all() -> None:
|
||||||
launch_cache()
|
launch_cache()
|
||||||
launch_indexing()
|
launch_indexing()
|
||||||
|
|
||||||
|
|
||||||
def check_all(stop: bool=False):
|
def check_all(stop: bool=False) -> None:
|
||||||
backends: Dict[str, bool] = {'cache': False, 'indexing': False}
|
backends: dict[str, bool] = {'cache': False, 'indexing': False}
|
||||||
while True:
|
while True:
|
||||||
for db_name in backends.keys():
|
for db_name in backends.keys():
|
||||||
try:
|
try:
|
||||||
|
@ -81,12 +83,12 @@ def check_all(stop: bool=False):
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
|
|
||||||
def stop_all():
|
def stop_all() -> None:
|
||||||
shutdown_cache()
|
shutdown_cache()
|
||||||
shutdown_indexing()
|
shutdown_indexing()
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main() -> None:
|
||||||
parser = argparse.ArgumentParser(description='Manage backend DBs.')
|
parser = argparse.ArgumentParser(description='Manage backend DBs.')
|
||||||
parser.add_argument("--start", action='store_true', default=False, help="Start all")
|
parser.add_argument("--start", action='store_true', default=False, help="Start all")
|
||||||
parser.add_argument("--stop", action='store_true', default=False, help="Stop all")
|
parser.add_argument("--stop", action='store_true', default=False, help="Stop all")
|
||||||
|
|
|
@ -5,7 +5,7 @@ import time
|
||||||
from lookyloo.default import AbstractManager
|
from lookyloo.default import AbstractManager
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main() -> None:
|
||||||
AbstractManager.force_shutdown()
|
AbstractManager.force_shutdown()
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
while True:
|
while True:
|
||||||
|
|
|
@ -5,7 +5,7 @@ from subprocess import Popen, run
|
||||||
from lookyloo.default import get_homedir
|
from lookyloo.default import get_homedir
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main() -> None:
|
||||||
# Just fail if the env isn't set.
|
# Just fail if the env isn't set.
|
||||||
get_homedir()
|
get_homedir()
|
||||||
print('Start backend (redis)...')
|
print('Start backend (redis)...')
|
||||||
|
|
|
@ -13,13 +13,13 @@ logging.config.dictConfig(get_config('logging'))
|
||||||
|
|
||||||
class Website(AbstractManager):
|
class Website(AbstractManager):
|
||||||
|
|
||||||
def __init__(self, loglevel: Optional[int]=None):
|
def __init__(self, loglevel: Optional[int]=None) -> None:
|
||||||
super().__init__(loglevel)
|
super().__init__(loglevel)
|
||||||
self.script_name = 'website'
|
self.script_name = 'website'
|
||||||
self.process = self._launch_website()
|
self.process: Popen = self._launch_website() # type: ignore[type-arg]
|
||||||
self.set_running()
|
self.set_running()
|
||||||
|
|
||||||
def _launch_website(self):
|
def _launch_website(self) -> Popen: # type: ignore[type-arg]
|
||||||
website_dir = get_homedir() / 'website'
|
website_dir = get_homedir() / 'website'
|
||||||
ip = get_config('generic', 'website_listen_ip')
|
ip = get_config('generic', 'website_listen_ip')
|
||||||
port = get_config('generic', 'website_listen_port')
|
port = get_config('generic', 'website_listen_port')
|
||||||
|
@ -32,7 +32,7 @@ class Website(AbstractManager):
|
||||||
cwd=website_dir)
|
cwd=website_dir)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main() -> None:
|
||||||
w = Website()
|
w = Website()
|
||||||
w.run(sleep_in_sec=10)
|
w.run(sleep_in_sec=10)
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@ from redis.exceptions import ConnectionError
|
||||||
from lookyloo.default import get_homedir, get_socket_path
|
from lookyloo.default import get_homedir, get_socket_path
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main() -> None:
|
||||||
get_homedir()
|
get_homedir()
|
||||||
p = Popen(['shutdown'])
|
p = Popen(['shutdown'])
|
||||||
p.wait()
|
p.wait()
|
||||||
|
|
|
@ -15,14 +15,14 @@ from lookyloo.default import get_homedir, get_config
|
||||||
logging.config.dictConfig(get_config('logging'))
|
logging.config.dictConfig(get_config('logging'))
|
||||||
|
|
||||||
|
|
||||||
def compute_hash_self():
|
def compute_hash_self() -> bytes:
|
||||||
m = hashlib.sha256()
|
m = hashlib.sha256()
|
||||||
with (get_homedir() / 'bin' / 'update.py').open('rb') as f:
|
with (get_homedir() / 'bin' / 'update.py').open('rb') as f:
|
||||||
m.update(f.read())
|
m.update(f.read())
|
||||||
return m.digest()
|
return m.digest()
|
||||||
|
|
||||||
|
|
||||||
def keep_going(ignore=False):
|
def keep_going(ignore: bool=False) -> None:
|
||||||
if ignore:
|
if ignore:
|
||||||
return
|
return
|
||||||
keep_going = input('Continue? (y/N) ')
|
keep_going = input('Continue? (y/N) ')
|
||||||
|
@ -31,7 +31,7 @@ def keep_going(ignore=False):
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
|
|
||||||
def run_command(command, expect_fail: bool=False, capture_output: bool=True):
|
def run_command(command: str, expect_fail: bool=False, capture_output: bool=True) -> None:
|
||||||
args = shlex.split(command)
|
args = shlex.split(command)
|
||||||
homedir = get_homedir()
|
homedir = get_homedir()
|
||||||
process = subprocess.run(args, cwd=homedir, capture_output=capture_output)
|
process = subprocess.run(args, cwd=homedir, capture_output=capture_output)
|
||||||
|
@ -42,7 +42,7 @@ def run_command(command, expect_fail: bool=False, capture_output: bool=True):
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
|
|
||||||
def check_poetry_version():
|
def check_poetry_version() -> None:
|
||||||
args = shlex.split("poetry self -V")
|
args = shlex.split("poetry self -V")
|
||||||
homedir = get_homedir()
|
homedir = get_homedir()
|
||||||
process = subprocess.run(args, cwd=homedir, capture_output=True)
|
process = subprocess.run(args, cwd=homedir, capture_output=True)
|
||||||
|
@ -58,7 +58,7 @@ def check_poetry_version():
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main() -> None:
|
||||||
parser = argparse.ArgumentParser(description='Pull latest release, update dependencies, update and validate the config files, update 3rd deps for the website.')
|
parser = argparse.ArgumentParser(description='Pull latest release, update dependencies, update and validate the config files, update 3rd deps for the website.')
|
||||||
parser.add_argument('--yes', default=False, action='store_true', help='Run all commands without asking.')
|
parser.add_argument('--yes', default=False, action='store_true', help='Run all commands without asking.')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
|
@ -1,3 +1,8 @@
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
from .lookyloo import Lookyloo # noqa
|
||||||
|
from .indexing import Indexing # noqa
|
||||||
|
|
||||||
logging.getLogger(__name__).addHandler(logging.NullHandler())
|
logging.getLogger(__name__).addHandler(logging.NullHandler())
|
||||||
|
|
||||||
|
__all__ = ['Lookyloo', 'Indexing']
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import contextlib
|
import contextlib
|
||||||
import gzip
|
import gzip
|
||||||
import json
|
import json
|
||||||
|
@ -13,15 +15,15 @@ import time
|
||||||
|
|
||||||
from collections.abc import Mapping
|
from collections.abc import Mapping
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from functools import lru_cache
|
from functools import lru_cache, _CacheInfo as CacheInfo
|
||||||
from logging import Logger, LoggerAdapter
|
from logging import Logger, LoggerAdapter
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List, Optional, Tuple, Union, Set, MutableMapping
|
from typing import Any, Dict, List, Optional, Tuple, Union, Set, MutableMapping, Iterator
|
||||||
|
|
||||||
import dns.rdatatype
|
import dns.rdatatype
|
||||||
import dns.resolver
|
import dns.resolver
|
||||||
from har2tree import CrawledTree, Har2TreeError, HarFile
|
from har2tree import CrawledTree, Har2TreeError, HarFile # type: ignore[attr-defined]
|
||||||
from pyipasnhistory import IPASNHistory
|
from pyipasnhistory import IPASNHistory # type: ignore[attr-defined]
|
||||||
from redis import Redis
|
from redis import Redis
|
||||||
|
|
||||||
from .context import Context
|
from .context import Context
|
||||||
|
@ -32,11 +34,11 @@ from .exceptions import MissingCaptureDirectory, NoValidHarFile, MissingUUID, Tr
|
||||||
from .modules import Cloudflare
|
from .modules import Cloudflare
|
||||||
|
|
||||||
|
|
||||||
class LookylooCacheLogAdapter(LoggerAdapter):
|
class LookylooCacheLogAdapter(LoggerAdapter): # type: ignore[type-arg]
|
||||||
"""
|
"""
|
||||||
Prepend log entry with the UUID of the capture
|
Prepend log entry with the UUID of the capture
|
||||||
"""
|
"""
|
||||||
def process(self, msg: str, kwargs: MutableMapping[str, Any]) -> Tuple[str, MutableMapping[str, Any]]:
|
def process(self, msg: str, kwargs: MutableMapping[str, Any]) -> tuple[str, MutableMapping[str, Any]]:
|
||||||
if self.extra:
|
if self.extra:
|
||||||
return '[{}] {}'.format(self.extra['uuid'], msg), kwargs
|
return '[{}] {}'.format(self.extra['uuid'], msg), kwargs
|
||||||
return msg, kwargs
|
return msg, kwargs
|
||||||
|
@ -47,10 +49,10 @@ class CaptureCache():
|
||||||
'error', 'no_index', 'categories', 'parent',
|
'error', 'no_index', 'categories', 'parent',
|
||||||
'user_agent', 'referer', 'logger')
|
'user_agent', 'referer', 'logger')
|
||||||
|
|
||||||
def __init__(self, cache_entry: Dict[str, Any]):
|
def __init__(self, cache_entry: dict[str, Any]):
|
||||||
logger = logging.getLogger(f'{self.__class__.__name__}')
|
logger = logging.getLogger(f'{self.__class__.__name__}')
|
||||||
logger.setLevel(get_config('generic', 'loglevel'))
|
logger.setLevel(get_config('generic', 'loglevel'))
|
||||||
__default_cache_keys: Tuple[str, str, str, str, str, str] = ('uuid', 'title', 'timestamp',
|
__default_cache_keys: tuple[str, str, str, str, str, str] = ('uuid', 'title', 'timestamp',
|
||||||
'url', 'redirects', 'capture_dir')
|
'url', 'redirects', 'capture_dir')
|
||||||
if 'uuid' not in cache_entry or 'capture_dir' not in cache_entry:
|
if 'uuid' not in cache_entry or 'capture_dir' not in cache_entry:
|
||||||
raise LookylooException(f'The capture is deeply broken: {cache_entry}')
|
raise LookylooException(f'The capture is deeply broken: {cache_entry}')
|
||||||
|
@ -80,16 +82,16 @@ class CaptureCache():
|
||||||
# If the microsecond is missing (0), it fails
|
# If the microsecond is missing (0), it fails
|
||||||
self.timestamp = datetime.strptime(cache_entry['timestamp'], '%Y-%m-%dT%H:%M:%S%z')
|
self.timestamp = datetime.strptime(cache_entry['timestamp'], '%Y-%m-%dT%H:%M:%S%z')
|
||||||
|
|
||||||
self.redirects: List[str] = json.loads(cache_entry['redirects']) if cache_entry.get('redirects') else []
|
self.redirects: list[str] = json.loads(cache_entry['redirects']) if cache_entry.get('redirects') else []
|
||||||
|
|
||||||
# Error without all the keys in __default_cache_keys was fatal.
|
# Error without all the keys in __default_cache_keys was fatal.
|
||||||
# if the keys in __default_cache_keys are present, it was an HTTP error and we still need to pass the error along
|
# if the keys in __default_cache_keys are present, it was an HTTP error and we still need to pass the error along
|
||||||
self.error: Optional[str] = cache_entry.get('error')
|
self.error: str | None = cache_entry.get('error')
|
||||||
self.no_index: bool = True if cache_entry.get('no_index') in [1, '1'] else False
|
self.no_index: bool = True if cache_entry.get('no_index') in [1, '1'] else False
|
||||||
self.categories: List[str] = json.loads(cache_entry['categories']) if cache_entry.get('categories') else []
|
self.categories: list[str] = json.loads(cache_entry['categories']) if cache_entry.get('categories') else []
|
||||||
self.parent: Optional[str] = cache_entry.get('parent')
|
self.parent: str | None = cache_entry.get('parent')
|
||||||
self.user_agent: Optional[str] = cache_entry.get('user_agent')
|
self.user_agent: str | None = cache_entry.get('user_agent')
|
||||||
self.referer: Optional[str] = cache_entry.get('referer')
|
self.referer: str | None = cache_entry.get('referer')
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def tree(self) -> CrawledTree:
|
def tree(self) -> CrawledTree:
|
||||||
|
@ -142,26 +144,26 @@ def load_pickle_tree(capture_dir: Path, last_mod_time: int, logger: Logger) -> C
|
||||||
raise NoValidHarFile("Couldn't find HAR files")
|
raise NoValidHarFile("Couldn't find HAR files")
|
||||||
|
|
||||||
|
|
||||||
def serialize_sets(obj):
|
def serialize_sets(obj: Any) -> Any:
|
||||||
if isinstance(obj, set):
|
if isinstance(obj, set):
|
||||||
return list(obj)
|
return list(obj)
|
||||||
|
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
|
||||||
class CapturesIndex(Mapping):
|
class CapturesIndex(Mapping): # type: ignore[type-arg]
|
||||||
|
|
||||||
def __init__(self, redis: Redis, contextualizer: Optional[Context]=None):
|
def __init__(self, redis: Redis, contextualizer: Context | None=None) -> None: # type: ignore[type-arg]
|
||||||
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
||||||
self.logger.setLevel(get_config('generic', 'loglevel'))
|
self.logger.setLevel(get_config('generic', 'loglevel'))
|
||||||
self.redis = redis
|
self.redis = redis
|
||||||
self.indexing = Indexing()
|
self.indexing = Indexing()
|
||||||
self.contextualizer = contextualizer
|
self.contextualizer = contextualizer
|
||||||
self.__cache: Dict[str, CaptureCache] = {}
|
self.__cache: dict[str, CaptureCache] = {}
|
||||||
self._quick_init()
|
self._quick_init()
|
||||||
self.timeout = get_config('generic', 'max_tree_create_time')
|
self.timeout = get_config('generic', 'max_tree_create_time')
|
||||||
try:
|
try:
|
||||||
self.ipasnhistory: Optional[IPASNHistory] = IPASNHistory()
|
self.ipasnhistory: IPASNHistory | None = IPASNHistory()
|
||||||
if not self.ipasnhistory.is_up:
|
if not self.ipasnhistory.is_up:
|
||||||
self.ipasnhistory = None
|
self.ipasnhistory = None
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -169,7 +171,7 @@ class CapturesIndex(Mapping):
|
||||||
self.logger.warning(f'Unable to setup IPASN History: {e}')
|
self.logger.warning(f'Unable to setup IPASN History: {e}')
|
||||||
self.ipasnhistory = None
|
self.ipasnhistory = None
|
||||||
try:
|
try:
|
||||||
self.cloudflare: Optional[Cloudflare] = Cloudflare()
|
self.cloudflare: Cloudflare | None = Cloudflare()
|
||||||
if not self.cloudflare.available:
|
if not self.cloudflare.available:
|
||||||
self.cloudflare = None
|
self.cloudflare = None
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -177,7 +179,7 @@ class CapturesIndex(Mapping):
|
||||||
self.cloudflare = None
|
self.cloudflare = None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def cached_captures(self) -> Set[str]:
|
def cached_captures(self) -> set[str]:
|
||||||
self._quick_init()
|
self._quick_init()
|
||||||
return set(self.__cache.keys())
|
return set(self.__cache.keys())
|
||||||
|
|
||||||
|
@ -199,10 +201,10 @@ class CapturesIndex(Mapping):
|
||||||
self.__cache[uuid] = self._set_capture_cache(capture_dir)
|
self.__cache[uuid] = self._set_capture_cache(capture_dir)
|
||||||
return self.__cache[uuid]
|
return self.__cache[uuid]
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self) -> Iterator[dict[str, CaptureCache]]:
|
||||||
return iter(self.__cache)
|
return iter(self.__cache) # type: ignore[arg-type]
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self) -> int:
|
||||||
return len(self.__cache)
|
return len(self.__cache)
|
||||||
|
|
||||||
def reload_cache(self, uuid: str) -> None:
|
def reload_cache(self, uuid: str) -> None:
|
||||||
|
@ -221,7 +223,7 @@ class CapturesIndex(Mapping):
|
||||||
self.redis.flushdb()
|
self.redis.flushdb()
|
||||||
self.__cache = {}
|
self.__cache = {}
|
||||||
|
|
||||||
def lru_cache_status(self):
|
def lru_cache_status(self) -> CacheInfo:
|
||||||
return load_pickle_tree.cache_info()
|
return load_pickle_tree.cache_info()
|
||||||
|
|
||||||
def _quick_init(self) -> None:
|
def _quick_init(self) -> None:
|
||||||
|
@ -332,11 +334,11 @@ class CapturesIndex(Mapping):
|
||||||
return tree
|
return tree
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _raise_timeout(_, __):
|
def _raise_timeout(_, __) -> None: # type: ignore[no-untyped-def]
|
||||||
raise TimeoutError
|
raise TimeoutError
|
||||||
|
|
||||||
@contextlib.contextmanager
|
@contextlib.contextmanager
|
||||||
def _timeout_context(self):
|
def _timeout_context(self) -> Iterator[None]:
|
||||||
if self.timeout != 0:
|
if self.timeout != 0:
|
||||||
# Register a function to raise a TimeoutError on the signal.
|
# Register a function to raise a TimeoutError on the signal.
|
||||||
signal.signal(signal.SIGALRM, self._raise_timeout)
|
signal.signal(signal.SIGALRM, self._raise_timeout)
|
||||||
|
@ -378,7 +380,7 @@ class CapturesIndex(Mapping):
|
||||||
logger.warning(f'Unable to rebuild the tree for {capture_dir}, the HAR files are broken.')
|
logger.warning(f'Unable to rebuild the tree for {capture_dir}, the HAR files are broken.')
|
||||||
tree = None
|
tree = None
|
||||||
|
|
||||||
cache: Dict[str, Union[str, int]] = {'uuid': uuid, 'capture_dir': capture_dir_str}
|
cache: dict[str, str | int] = {'uuid': uuid, 'capture_dir': capture_dir_str}
|
||||||
if capture_settings.get('url'):
|
if capture_settings.get('url'):
|
||||||
cache['url'] = capture_settings['url']
|
cache['url'] = capture_settings['url']
|
||||||
|
|
||||||
|
@ -450,18 +452,18 @@ class CapturesIndex(Mapping):
|
||||||
p.execute()
|
p.execute()
|
||||||
return CaptureCache(cache)
|
return CaptureCache(cache)
|
||||||
|
|
||||||
def __resolve_dns(self, ct: CrawledTree, logger: LookylooCacheLogAdapter):
|
def __resolve_dns(self, ct: CrawledTree, logger: LookylooCacheLogAdapter) -> CrawledTree:
|
||||||
'''Resolves all domains of the tree, keeps A (IPv4), AAAA (IPv6), and CNAME entries
|
'''Resolves all domains of the tree, keeps A (IPv4), AAAA (IPv6), and CNAME entries
|
||||||
and store them in ips.json and cnames.json, in the capture directory.
|
and store them in ips.json and cnames.json, in the capture directory.
|
||||||
Updates the nodes of the tree accordingly so the information is available.
|
Updates the nodes of the tree accordingly so the information is available.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def _build_cname_chain(known_cnames: Dict[str, str], hostname) -> List[str]:
|
def _build_cname_chain(known_cnames: dict[str, str], hostname: str) -> list[str]:
|
||||||
'''Returns a list of CNAMEs starting from one hostname.
|
'''Returns a list of CNAMEs starting from one hostname.
|
||||||
The CNAMEs resolutions are made in `_resolve_dns`. A hostname can have a CNAME entry
|
The CNAMEs resolutions are made in `_resolve_dns`. A hostname can have a CNAME entry
|
||||||
and the CNAME entry can have an other CNAME entry, and so on multiple times.
|
and the CNAME entry can have an other CNAME entry, and so on multiple times.
|
||||||
This method loops over the hostnames until there are no CNAMES.'''
|
This method loops over the hostnames until there are no CNAMES.'''
|
||||||
cnames: List[str] = []
|
cnames: list[str] = []
|
||||||
to_search = hostname
|
to_search = hostname
|
||||||
while True:
|
while True:
|
||||||
if not known_cnames.get(to_search):
|
if not known_cnames.get(to_search):
|
||||||
|
@ -474,7 +476,7 @@ class CapturesIndex(Mapping):
|
||||||
ips_path = ct.root_hartree.har.path.parent / 'ips.json'
|
ips_path = ct.root_hartree.har.path.parent / 'ips.json'
|
||||||
ipasn_path = ct.root_hartree.har.path.parent / 'ipasn.json'
|
ipasn_path = ct.root_hartree.har.path.parent / 'ipasn.json'
|
||||||
|
|
||||||
host_cnames: Dict[str, str] = {}
|
host_cnames: dict[str, str] = {}
|
||||||
if cnames_path.exists():
|
if cnames_path.exists():
|
||||||
try:
|
try:
|
||||||
with cnames_path.open() as f:
|
with cnames_path.open() as f:
|
||||||
|
@ -483,7 +485,7 @@ class CapturesIndex(Mapping):
|
||||||
# The json is broken, delete and re-trigger the requests
|
# The json is broken, delete and re-trigger the requests
|
||||||
host_cnames = {}
|
host_cnames = {}
|
||||||
|
|
||||||
host_ips: Dict[str, Dict[str, Set[str]]] = {}
|
host_ips: dict[str, dict[str, set[str]]] = {}
|
||||||
if ips_path.exists():
|
if ips_path.exists():
|
||||||
try:
|
try:
|
||||||
with ips_path.open() as f:
|
with ips_path.open() as f:
|
||||||
|
@ -492,7 +494,7 @@ class CapturesIndex(Mapping):
|
||||||
# The json is broken, delete and re-trigger the requests
|
# The json is broken, delete and re-trigger the requests
|
||||||
host_ips = {}
|
host_ips = {}
|
||||||
|
|
||||||
ipasn: Dict[str, Dict[str, str]] = {}
|
ipasn: dict[str, dict[str, str]] = {}
|
||||||
if ipasn_path.exists():
|
if ipasn_path.exists():
|
||||||
try:
|
try:
|
||||||
with ipasn_path.open() as f:
|
with ipasn_path.open() as f:
|
||||||
|
|
|
@ -1,11 +1,13 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import fnmatch
|
import fnmatch
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from typing import Dict, Any, Union, List, Optional, TypedDict, Tuple
|
from typing import Dict, Any, Union, List, Optional, TypedDict, Tuple
|
||||||
|
|
||||||
from har2tree import URLNode
|
from har2tree import URLNode # type: ignore[attr-defined]
|
||||||
|
|
||||||
from redis import ConnectionPool, Redis
|
from redis import ConnectionPool, Redis
|
||||||
from redis.connection import UnixDomainSocketConnection
|
from redis.connection import UnixDomainSocketConnection
|
||||||
|
@ -19,8 +21,8 @@ from .exceptions import MissingUUID, TreeNeedsRebuild
|
||||||
class CompareSettings(TypedDict):
|
class CompareSettings(TypedDict):
|
||||||
'''The settings that can be passed to the compare method to filter out some differences'''
|
'''The settings that can be passed to the compare method to filter out some differences'''
|
||||||
|
|
||||||
ressources_ignore_domains: Tuple[str, ...]
|
ressources_ignore_domains: tuple[str, ...]
|
||||||
ressources_ignore_regexes: Tuple[str, ...]
|
ressources_ignore_regexes: tuple[str, ...]
|
||||||
|
|
||||||
ignore_ips: bool
|
ignore_ips: bool
|
||||||
|
|
||||||
|
@ -39,16 +41,16 @@ class Comparator():
|
||||||
self.public_domain = get_config('generic', 'public_domain')
|
self.public_domain = get_config('generic', 'public_domain')
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def redis(self) -> Redis:
|
def redis(self) -> Redis: # type: ignore[type-arg]
|
||||||
return Redis(connection_pool=self.redis_pool)
|
return Redis(connection_pool=self.redis_pool)
|
||||||
|
|
||||||
def get_comparables_node(self, node: URLNode) -> Dict[str, str]:
|
def get_comparables_node(self, node: URLNode) -> dict[str, str]:
|
||||||
to_return = {'url': node.name, 'hostname': node.hostname}
|
to_return = {'url': node.name, 'hostname': node.hostname}
|
||||||
if hasattr(node, 'ip_address'):
|
if hasattr(node, 'ip_address'):
|
||||||
to_return['ip_address'] = str(node.ip_address)
|
to_return['ip_address'] = str(node.ip_address)
|
||||||
return to_return
|
return to_return
|
||||||
|
|
||||||
def _compare_nodes(self, left: Dict[str, str], right: Dict[str, str], /, different: bool, ignore_ips: bool) -> Tuple[bool, Dict[str, Any]]:
|
def _compare_nodes(self, left: dict[str, str], right: dict[str, str], /, different: bool, ignore_ips: bool) -> tuple[bool, dict[str, Any]]:
|
||||||
to_return = {}
|
to_return = {}
|
||||||
# URL
|
# URL
|
||||||
if left['url'] != right['url']:
|
if left['url'] != right['url']:
|
||||||
|
@ -78,12 +80,12 @@ class Comparator():
|
||||||
# IPs in hostnode + ASNs
|
# IPs in hostnode + ASNs
|
||||||
return different, to_return
|
return different, to_return
|
||||||
|
|
||||||
def get_comparables_capture(self, capture_uuid: str) -> Dict[str, Any]:
|
def get_comparables_capture(self, capture_uuid: str) -> dict[str, Any]:
|
||||||
if capture_uuid not in self._captures_index:
|
if capture_uuid not in self._captures_index:
|
||||||
raise MissingUUID(f'{capture_uuid} does not exists.')
|
raise MissingUUID(f'{capture_uuid} does not exists.')
|
||||||
|
|
||||||
capture = self._captures_index[capture_uuid]
|
capture = self._captures_index[capture_uuid]
|
||||||
to_return: Dict[str, Any]
|
to_return: dict[str, Any]
|
||||||
try:
|
try:
|
||||||
if capture.error:
|
if capture.error:
|
||||||
# The error on lookyloo is too verbose and contains the UUID of the capture, skip that.
|
# The error on lookyloo is too verbose and contains the UUID of the capture, skip that.
|
||||||
|
@ -108,17 +110,17 @@ class Comparator():
|
||||||
to_return = {'error': str(e)}
|
to_return = {'error': str(e)}
|
||||||
return to_return
|
return to_return
|
||||||
|
|
||||||
def compare_captures(self, capture_left: str, capture_right: str, /, *, settings: Optional[CompareSettings]=None) -> Tuple[bool, Dict[str, Any]]:
|
def compare_captures(self, capture_left: str, capture_right: str, /, *, settings: CompareSettings | None=None) -> tuple[bool, dict[str, Any]]:
|
||||||
if capture_left not in self._captures_index:
|
if capture_left not in self._captures_index:
|
||||||
raise MissingUUID(f'{capture_left} does not exists.')
|
raise MissingUUID(f'{capture_left} does not exists.')
|
||||||
if capture_right not in self._captures_index:
|
if capture_right not in self._captures_index:
|
||||||
raise MissingUUID(f'{capture_right} does not exists.')
|
raise MissingUUID(f'{capture_right} does not exists.')
|
||||||
|
|
||||||
different: bool = False
|
different: bool = False
|
||||||
to_return: Dict[str, Dict[str, Union[str,
|
to_return: dict[str, dict[str, (str |
|
||||||
List[Union[str, Dict[str, Any]]],
|
list[str | dict[str, Any]] |
|
||||||
Dict[str, Union[int, str,
|
dict[str, (int | str |
|
||||||
List[Union[int, str, Dict[str, Any]]]]]]]] = {}
|
list[int | str | dict[str, Any]])])]] = {}
|
||||||
to_return['lookyloo_urls'] = {'left': f'https://{self.public_domain}/tree/{capture_left}',
|
to_return['lookyloo_urls'] = {'left': f'https://{self.public_domain}/tree/{capture_left}',
|
||||||
'right': f'https://{self.public_domain}/tree/{capture_right}'}
|
'right': f'https://{self.public_domain}/tree/{capture_right}'}
|
||||||
left = self.get_comparables_capture(capture_left)
|
left = self.get_comparables_capture(capture_left)
|
||||||
|
@ -192,7 +194,7 @@ class Comparator():
|
||||||
'details': left['redirects']['length']}
|
'details': left['redirects']['length']}
|
||||||
|
|
||||||
# Prepare settings
|
# Prepare settings
|
||||||
_settings: Optional[CompareSettings]
|
_settings: CompareSettings | None
|
||||||
if settings:
|
if settings:
|
||||||
# cleanup the settings
|
# cleanup the settings
|
||||||
_ignore_domains = set(settings['ressources_ignore_domains'] if settings.get('ressources_ignore_domains') else [])
|
_ignore_domains = set(settings['ressources_ignore_domains'] if settings.get('ressources_ignore_domains') else [])
|
||||||
|
|
|
@ -1,12 +1,14 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List, Optional, Set, Union
|
from typing import Any, Dict, List, Optional, Set, Union
|
||||||
from urllib.parse import urlsplit
|
from urllib.parse import urlsplit
|
||||||
|
|
||||||
from har2tree import CrawledTree, HostNode, URLNode
|
from har2tree import CrawledTree, HostNode, URLNode # type: ignore[attr-defined]
|
||||||
from redis import Redis
|
from redis import Redis
|
||||||
|
|
||||||
from .default import get_config, get_homedir, get_socket_path
|
from .default import get_config, get_homedir, get_socket_path
|
||||||
|
@ -16,14 +18,14 @@ from .modules import SaneJavaScript
|
||||||
|
|
||||||
class Context():
|
class Context():
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self) -> None:
|
||||||
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
||||||
self.logger.setLevel(get_config('generic', 'loglevel'))
|
self.logger.setLevel(get_config('generic', 'loglevel'))
|
||||||
self.redis: Redis = Redis(unix_socket_path=get_socket_path('indexing'), db=1, decode_responses=True)
|
self.redis: Redis = Redis(unix_socket_path=get_socket_path('indexing'), db=1, decode_responses=True) # type: ignore[type-arg]
|
||||||
self._cache_known_content()
|
self._cache_known_content()
|
||||||
self.sanejs = SaneJavaScript(config_name='SaneJS')
|
self.sanejs = SaneJavaScript(config_name='SaneJS')
|
||||||
|
|
||||||
def clear_context(self):
|
def clear_context(self) -> None:
|
||||||
self.redis.flushdb()
|
self.redis.flushdb()
|
||||||
|
|
||||||
def _cache_known_content(self) -> None:
|
def _cache_known_content(self) -> None:
|
||||||
|
@ -55,13 +57,13 @@ class Context():
|
||||||
p.sadd(f'bh|{h}|legitimate', *details['hostnames'])
|
p.sadd(f'bh|{h}|legitimate', *details['hostnames'])
|
||||||
p.execute()
|
p.execute()
|
||||||
|
|
||||||
def find_known_content(self, har2tree_container: Union[CrawledTree, HostNode, URLNode, str]) -> Dict[str, Any]:
|
def find_known_content(self, har2tree_container: CrawledTree | HostNode | URLNode | str) -> dict[str, Any]:
|
||||||
"""Return a dictionary of content resources found in the local known_content database, or in SaneJS (if enabled)"""
|
"""Return a dictionary of content resources found in the local known_content database, or in SaneJS (if enabled)"""
|
||||||
if isinstance(har2tree_container, str):
|
if isinstance(har2tree_container, str):
|
||||||
to_lookup: Set[str] = {har2tree_container, }
|
to_lookup: set[str] = {har2tree_container, }
|
||||||
else:
|
else:
|
||||||
to_lookup = get_resources_hashes(har2tree_container)
|
to_lookup = get_resources_hashes(har2tree_container)
|
||||||
known_content_table: Dict[str, Any] = {}
|
known_content_table: dict[str, Any] = {}
|
||||||
if not to_lookup:
|
if not to_lookup:
|
||||||
return known_content_table
|
return known_content_table
|
||||||
# get generic known content
|
# get generic known content
|
||||||
|
@ -113,7 +115,7 @@ class Context():
|
||||||
|
|
||||||
return known_content_table
|
return known_content_table
|
||||||
|
|
||||||
def store_known_legitimate_tree(self, tree: CrawledTree):
|
def store_known_legitimate_tree(self, tree: CrawledTree) -> None:
|
||||||
known_content = self.find_known_content(tree)
|
known_content = self.find_known_content(tree)
|
||||||
capture_file: Path = get_homedir() / 'known_content_user' / f'{urlsplit(tree.root_url).hostname}.json'
|
capture_file: Path = get_homedir() / 'known_content_user' / f'{urlsplit(tree.root_url).hostname}.json'
|
||||||
if capture_file.exists():
|
if capture_file.exists():
|
||||||
|
@ -156,7 +158,7 @@ class Context():
|
||||||
with open(capture_file, 'w') as f:
|
with open(capture_file, 'w') as f:
|
||||||
json.dump(to_store, f, indent=2, default=serialize_to_json)
|
json.dump(to_store, f, indent=2, default=serialize_to_json)
|
||||||
|
|
||||||
def mark_as_legitimate(self, tree: CrawledTree, hostnode_uuid: Optional[str]=None, urlnode_uuid: Optional[str]=None) -> None:
|
def mark_as_legitimate(self, tree: CrawledTree, hostnode_uuid: str | None=None, urlnode_uuid: str | None=None) -> None:
|
||||||
if hostnode_uuid:
|
if hostnode_uuid:
|
||||||
urlnodes = tree.root_hartree.get_host_node_by_uuid(hostnode_uuid).urls
|
urlnodes = tree.root_hartree.get_host_node_by_uuid(hostnode_uuid).urls
|
||||||
elif urlnode_uuid:
|
elif urlnode_uuid:
|
||||||
|
@ -214,7 +216,7 @@ class Context():
|
||||||
def legitimate_body(self, body_hash: str, legitimate_hostname: str) -> None:
|
def legitimate_body(self, body_hash: str, legitimate_hostname: str) -> None:
|
||||||
self.redis.sadd(f'bh|{body_hash}|legitimate', legitimate_hostname)
|
self.redis.sadd(f'bh|{body_hash}|legitimate', legitimate_hostname)
|
||||||
|
|
||||||
def store_known_malicious_ressource(self, ressource_hash: str, details: Dict[str, str]):
|
def store_known_malicious_ressource(self, ressource_hash: str, details: dict[str, str]) -> None:
|
||||||
known_malicious_ressource_file = get_homedir() / 'known_content_user' / 'malicious.json'
|
known_malicious_ressource_file = get_homedir() / 'known_content_user' / 'malicious.json'
|
||||||
if known_malicious_ressource_file.exists():
|
if known_malicious_ressource_file.exists():
|
||||||
with open(known_malicious_ressource_file) as f:
|
with open(known_malicious_ressource_file) as f:
|
||||||
|
@ -236,7 +238,7 @@ class Context():
|
||||||
with open(known_malicious_ressource_file, 'w') as f:
|
with open(known_malicious_ressource_file, 'w') as f:
|
||||||
json.dump(to_store, f, indent=2, default=serialize_to_json)
|
json.dump(to_store, f, indent=2, default=serialize_to_json)
|
||||||
|
|
||||||
def add_malicious(self, ressource_hash: str, details: Dict[str, str]):
|
def add_malicious(self, ressource_hash: str, details: dict[str, str]) -> None:
|
||||||
self.store_known_malicious_ressource(ressource_hash, details)
|
self.store_known_malicious_ressource(ressource_hash, details)
|
||||||
p = self.redis.pipeline()
|
p = self.redis.pipeline()
|
||||||
p.sadd('bh|malicious', ressource_hash)
|
p.sadd('bh|malicious', ressource_hash)
|
||||||
|
@ -246,7 +248,7 @@ class Context():
|
||||||
p.sadd(f'{ressource_hash}|tag', details['type'])
|
p.sadd(f'{ressource_hash}|tag', details['type'])
|
||||||
p.execute()
|
p.execute()
|
||||||
|
|
||||||
def store_known_legitimate_ressource(self, ressource_hash: str, details: Dict[str, str]):
|
def store_known_legitimate_ressource(self, ressource_hash: str, details: dict[str, str]) -> None:
|
||||||
known_legitimate_ressource_file = get_homedir() / 'known_content_user' / 'legitimate.json'
|
known_legitimate_ressource_file = get_homedir() / 'known_content_user' / 'legitimate.json'
|
||||||
if known_legitimate_ressource_file.exists():
|
if known_legitimate_ressource_file.exists():
|
||||||
with open(known_legitimate_ressource_file) as f:
|
with open(known_legitimate_ressource_file) as f:
|
||||||
|
@ -267,7 +269,7 @@ class Context():
|
||||||
with open(known_legitimate_ressource_file, 'w') as f:
|
with open(known_legitimate_ressource_file, 'w') as f:
|
||||||
json.dump(to_store, f, indent=2, default=serialize_to_json)
|
json.dump(to_store, f, indent=2, default=serialize_to_json)
|
||||||
|
|
||||||
def add_legitimate(self, ressource_hash: str, details: Dict[str, str]):
|
def add_legitimate(self, ressource_hash: str, details: dict[str, str]) -> None:
|
||||||
self.store_known_legitimate_ressource(ressource_hash, details)
|
self.store_known_legitimate_ressource(ressource_hash, details)
|
||||||
if 'domain' in details:
|
if 'domain' in details:
|
||||||
self.redis.sadd(f'bh|{ressource_hash}|legitimate', details['domain'])
|
self.redis.sadd(f'bh|{ressource_hash}|legitimate', details['domain'])
|
||||||
|
@ -277,7 +279,7 @@ class Context():
|
||||||
|
|
||||||
# Query DB
|
# Query DB
|
||||||
|
|
||||||
def is_legitimate(self, urlnode: URLNode, known_hashes: Dict[str, Any]) -> Optional[bool]:
|
def is_legitimate(self, urlnode: URLNode, known_hashes: dict[str, Any]) -> bool | None:
|
||||||
"""
|
"""
|
||||||
If legitimate if generic, marked as legitimate or known on sanejs, loaded from the right domain
|
If legitimate if generic, marked as legitimate or known on sanejs, loaded from the right domain
|
||||||
3 cases:
|
3 cases:
|
||||||
|
@ -285,7 +287,7 @@ class Context():
|
||||||
* False if *any* content is malicious
|
* False if *any* content is malicious
|
||||||
* None in all other cases
|
* None in all other cases
|
||||||
"""
|
"""
|
||||||
status: List[Optional[bool]] = []
|
status: list[bool | None] = []
|
||||||
for h in urlnode.resources_hashes:
|
for h in urlnode.resources_hashes:
|
||||||
# Note: we can have multiple hashes on the same urlnode (see embedded resources).
|
# Note: we can have multiple hashes on the same urlnode (see embedded resources).
|
||||||
if h not in known_hashes:
|
if h not in known_hashes:
|
||||||
|
@ -305,7 +307,7 @@ class Context():
|
||||||
return True # All the contents are known legitimate
|
return True # All the contents are known legitimate
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def is_malicious(self, urlnode: URLNode, known_hashes: Dict[str, Any]) -> Optional[bool]:
|
def is_malicious(self, urlnode: URLNode, known_hashes: dict[str, Any]) -> bool | None:
|
||||||
"""3 cases:
|
"""3 cases:
|
||||||
* True if *any* content is malicious
|
* True if *any* content is malicious
|
||||||
* False if *all* the contents are known legitimate
|
* False if *all* the contents are known legitimate
|
||||||
|
|
|
@ -16,3 +16,17 @@ from .exceptions import MissingEnv, CreateDirectoryException, ConfigError # noq
|
||||||
from .helpers import get_homedir, load_configs, get_config, safe_create_dir, get_socket_path, try_make_file # noqa
|
from .helpers import get_homedir, load_configs, get_config, safe_create_dir, get_socket_path, try_make_file # noqa
|
||||||
|
|
||||||
os.chdir(get_homedir())
|
os.chdir(get_homedir())
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
'LookylooException',
|
||||||
|
'AbstractManager',
|
||||||
|
'MissingEnv',
|
||||||
|
'CreateDirectoryException',
|
||||||
|
'ConfigError',
|
||||||
|
'get_homedir',
|
||||||
|
'load_configs',
|
||||||
|
'get_config',
|
||||||
|
'safe_create_dir',
|
||||||
|
'get_socket_path',
|
||||||
|
'try_make_file',
|
||||||
|
]
|
||||||
|
|
|
@ -1,14 +1,16 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
|
import logging.config
|
||||||
import os
|
import os
|
||||||
import signal
|
import signal
|
||||||
import time
|
import time
|
||||||
from abc import ABC
|
from abc import ABC
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from subprocess import Popen
|
from subprocess import Popen
|
||||||
from typing import List, Optional, Tuple
|
|
||||||
|
|
||||||
from redis import Redis
|
from redis import Redis
|
||||||
from redis.exceptions import ConnectionError as RedisConnectionError
|
from redis.exceptions import ConnectionError as RedisConnectionError
|
||||||
|
@ -20,18 +22,18 @@ class AbstractManager(ABC):
|
||||||
|
|
||||||
script_name: str
|
script_name: str
|
||||||
|
|
||||||
def __init__(self, loglevel: Optional[int]=None):
|
def __init__(self, loglevel: int | None=None):
|
||||||
self.loglevel: int = loglevel if loglevel is not None else get_config('generic', 'loglevel') or logging.INFO
|
self.loglevel: int = loglevel if loglevel is not None else get_config('generic', 'loglevel') or logging.INFO
|
||||||
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
||||||
self.logger.setLevel(self.loglevel)
|
self.logger.setLevel(self.loglevel)
|
||||||
self.logger.info(f'Initializing {self.__class__.__name__}')
|
self.logger.info(f'Initializing {self.__class__.__name__}')
|
||||||
self.process: Optional[Popen] = None
|
self.process: Popen | None = None # type: ignore[type-arg]
|
||||||
self.__redis = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
|
self.__redis = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
|
||||||
|
|
||||||
self.force_stop = False
|
self.force_stop = False
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_running() -> List[Tuple[str, float]]:
|
def is_running() -> list[tuple[str, float]]:
|
||||||
try:
|
try:
|
||||||
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
|
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
|
||||||
for script_name, score in r.zrangebyscore('running', '-inf', '+inf', withscores=True):
|
for script_name, score in r.zrangebyscore('running', '-inf', '+inf', withscores=True):
|
||||||
|
@ -52,7 +54,7 @@ class AbstractManager(ABC):
|
||||||
return []
|
return []
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def clear_running():
|
def clear_running() -> None:
|
||||||
try:
|
try:
|
||||||
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
|
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
|
||||||
r.delete('running')
|
r.delete('running')
|
||||||
|
@ -60,14 +62,14 @@ class AbstractManager(ABC):
|
||||||
print('Unable to connect to redis, the system is down.')
|
print('Unable to connect to redis, the system is down.')
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def force_shutdown():
|
def force_shutdown() -> None:
|
||||||
try:
|
try:
|
||||||
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
|
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
|
||||||
r.set('shutdown', 1)
|
r.set('shutdown', 1)
|
||||||
except RedisConnectionError:
|
except RedisConnectionError:
|
||||||
print('Unable to connect to redis, the system is down.')
|
print('Unable to connect to redis, the system is down.')
|
||||||
|
|
||||||
def set_running(self, number: Optional[int]=None) -> None:
|
def set_running(self, number: int | None=None) -> None:
|
||||||
if number == 0:
|
if number == 0:
|
||||||
self.__redis.zrem('running', self.script_name)
|
self.__redis.zrem('running', self.script_name)
|
||||||
else:
|
else:
|
||||||
|
@ -111,7 +113,7 @@ class AbstractManager(ABC):
|
||||||
def _to_run_forever(self) -> None:
|
def _to_run_forever(self) -> None:
|
||||||
raise NotImplementedError('This method must be implemented by the child')
|
raise NotImplementedError('This method must be implemented by the child')
|
||||||
|
|
||||||
def _kill_process(self):
|
def _kill_process(self) -> None:
|
||||||
if self.process is None:
|
if self.process is None:
|
||||||
return
|
return
|
||||||
kill_order = [signal.SIGWINCH, signal.SIGTERM, signal.SIGINT, signal.SIGKILL]
|
kill_order = [signal.SIGWINCH, signal.SIGTERM, signal.SIGINT, signal.SIGKILL]
|
||||||
|
@ -167,7 +169,7 @@ class AbstractManager(ABC):
|
||||||
def _wait_to_finish(self) -> None:
|
def _wait_to_finish(self) -> None:
|
||||||
self.logger.info('Not implemented, nothing to wait for.')
|
self.logger.info('Not implemented, nothing to wait for.')
|
||||||
|
|
||||||
async def stop(self):
|
async def stop(self) -> None:
|
||||||
self.force_stop = True
|
self.force_stop = True
|
||||||
|
|
||||||
async def _to_run_forever_async(self) -> None:
|
async def _to_run_forever_async(self) -> None:
|
||||||
|
@ -176,7 +178,7 @@ class AbstractManager(ABC):
|
||||||
async def _wait_to_finish_async(self) -> None:
|
async def _wait_to_finish_async(self) -> None:
|
||||||
self.logger.info('Not implemented, nothing to wait for.')
|
self.logger.info('Not implemented, nothing to wait for.')
|
||||||
|
|
||||||
async def stop_async(self):
|
async def stop_async(self) -> None:
|
||||||
"""Method to pass the signal handler:
|
"""Method to pass the signal handler:
|
||||||
loop.add_signal_handler(signal.SIGTERM, lambda: loop.create_task(p.stop()))
|
loop.add_signal_handler(signal.SIGTERM, lambda: loop.create_task(p.stop()))
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -1,4 +1,7 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
@ -9,7 +12,7 @@ from typing import Any, Dict, Optional, Union
|
||||||
from . import env_global_name
|
from . import env_global_name
|
||||||
from .exceptions import ConfigError, CreateDirectoryException, MissingEnv
|
from .exceptions import ConfigError, CreateDirectoryException, MissingEnv
|
||||||
|
|
||||||
configs: Dict[str, Dict[str, Any]] = {}
|
configs: dict[str, dict[str, Any]] = {}
|
||||||
logger = logging.getLogger('Helpers')
|
logger = logging.getLogger('Helpers')
|
||||||
|
|
||||||
|
|
||||||
|
@ -34,7 +37,7 @@ Run the following command (assuming you run the code from the clonned repository
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(64)
|
@lru_cache(64)
|
||||||
def load_configs(path_to_config_files: Optional[Union[str, Path]]=None):
|
def load_configs(path_to_config_files: str | Path | None=None) -> None:
|
||||||
global configs
|
global configs
|
||||||
if configs:
|
if configs:
|
||||||
return
|
return
|
||||||
|
@ -57,7 +60,7 @@ def load_configs(path_to_config_files: Optional[Union[str, Path]]=None):
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(64)
|
@lru_cache(64)
|
||||||
def get_config(config_type: str, entry: Optional[str]=None, quiet: bool=False) -> Any:
|
def get_config(config_type: str, entry: str | None=None, quiet: bool=False) -> Any:
|
||||||
"""Get an entry from the given config_type file. Automatic fallback to the sample file"""
|
"""Get an entry from the given config_type file. Automatic fallback to the sample file"""
|
||||||
global configs
|
global configs
|
||||||
if not configs:
|
if not configs:
|
||||||
|
@ -97,7 +100,7 @@ def get_socket_path(name: str) -> str:
|
||||||
return str(get_homedir() / mapping[name])
|
return str(get_homedir() / mapping[name])
|
||||||
|
|
||||||
|
|
||||||
def try_make_file(filename: Path):
|
def try_make_file(filename: Path) -> bool:
|
||||||
try:
|
try:
|
||||||
filename.touch(exist_ok=False)
|
filename.touch(exist_ok=False)
|
||||||
return True
|
return True
|
||||||
|
|
|
@ -14,23 +14,22 @@ from typing import Any, Dict, List, Optional, Set, Union, Tuple
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
|
||||||
from har2tree import CrawledTree, HostNode, URLNode
|
from har2tree import CrawledTree, HostNode, URLNode # type: ignore[attr-defined]
|
||||||
from playwrightcapture import get_devices
|
from playwrightcapture import get_devices
|
||||||
from publicsuffixlist import PublicSuffixList # type: ignore
|
from publicsuffixlist import PublicSuffixList # type: ignore
|
||||||
from pytaxonomies import Taxonomies
|
from pytaxonomies import Taxonomies # type: ignore[attr-defined]
|
||||||
from ua_parser import user_agent_parser # type: ignore
|
from ua_parser import user_agent_parser # type: ignore
|
||||||
from werkzeug.user_agent import UserAgent
|
from werkzeug.user_agent import UserAgent
|
||||||
from werkzeug.utils import cached_property
|
from werkzeug.utils import cached_property
|
||||||
|
|
||||||
from .default import get_homedir, safe_create_dir, get_config
|
from .default import get_homedir, safe_create_dir, get_config, LookylooException
|
||||||
from .exceptions import LookylooException
|
|
||||||
|
|
||||||
logger = logging.getLogger('Lookyloo - Helpers')
|
logger = logging.getLogger('Lookyloo - Helpers')
|
||||||
|
|
||||||
|
|
||||||
# This method is used in json.dump or json.dumps calls as the default parameter:
|
# This method is used in json.dump or json.dumps calls as the default parameter:
|
||||||
# json.dumps(..., default=dump_to_json)
|
# json.dumps(..., default=dump_to_json)
|
||||||
def serialize_to_json(obj: Union[Set]) -> Union[List]:
|
def serialize_to_json(obj: Union[Set[Any]]) -> Union[List[Any]]:
|
||||||
if isinstance(obj, set):
|
if isinstance(obj, set):
|
||||||
return sorted(obj)
|
return sorted(obj)
|
||||||
|
|
||||||
|
@ -52,12 +51,12 @@ def get_resources_hashes(har2tree_container: Union[CrawledTree, HostNode, URLNod
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(64)
|
@lru_cache(64)
|
||||||
def get_taxonomies():
|
def get_taxonomies() -> Taxonomies:
|
||||||
return Taxonomies()
|
return Taxonomies()
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(64)
|
@lru_cache(64)
|
||||||
def get_public_suffix_list():
|
def get_public_suffix_list() -> PublicSuffixList:
|
||||||
"""Initialize Public Suffix List"""
|
"""Initialize Public Suffix List"""
|
||||||
# TODO (?): fetch the list
|
# TODO (?): fetch the list
|
||||||
return PublicSuffixList()
|
return PublicSuffixList()
|
||||||
|
@ -131,7 +130,7 @@ def get_sorted_captures_from_disk(captures_dir: Path, /, *,
|
||||||
|
|
||||||
class UserAgents:
|
class UserAgents:
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self) -> None:
|
||||||
if get_config('generic', 'use_user_agents_users'):
|
if get_config('generic', 'use_user_agents_users'):
|
||||||
self.path = get_homedir() / 'own_user_agents'
|
self.path = get_homedir() / 'own_user_agents'
|
||||||
else:
|
else:
|
||||||
|
@ -145,14 +144,14 @@ class UserAgents:
|
||||||
self.playwright_devices = get_devices()
|
self.playwright_devices = get_devices()
|
||||||
self._load_newest_ua_file(ua_files_path[0])
|
self._load_newest_ua_file(ua_files_path[0])
|
||||||
|
|
||||||
def _load_newest_ua_file(self, path: Path):
|
def _load_newest_ua_file(self, path: Path) -> None:
|
||||||
self.most_recent_ua_path = path
|
self.most_recent_ua_path = path
|
||||||
with self.most_recent_ua_path.open() as f:
|
with self.most_recent_ua_path.open() as f:
|
||||||
self.most_recent_uas = json.load(f)
|
self.most_recent_uas = json.load(f)
|
||||||
self.by_freq = self.most_recent_uas.pop('by_frequency')
|
self.by_freq = self.most_recent_uas.pop('by_frequency')
|
||||||
self._load_playwright_devices()
|
self._load_playwright_devices()
|
||||||
|
|
||||||
def _load_playwright_devices(self):
|
def _load_playwright_devices(self) -> None:
|
||||||
# Only get default and desktop for now.
|
# Only get default and desktop for now.
|
||||||
for device_name, details in self.playwright_devices['desktop']['default'].items():
|
for device_name, details in self.playwright_devices['desktop']['default'].items():
|
||||||
parsed_ua = ParsedUserAgent(details['user_agent'])
|
parsed_ua = ParsedUserAgent(details['user_agent'])
|
||||||
|
@ -254,16 +253,17 @@ def load_cookies(cookie_pseudofile: Optional[Union[BufferedIOBase, str, bytes, L
|
||||||
return to_return
|
return to_return
|
||||||
|
|
||||||
|
|
||||||
def uniq_domains(uniq_urls):
|
def uniq_domains(uniq_urls: List[str]) -> Set[str]:
|
||||||
domains = set()
|
domains = set()
|
||||||
for url in uniq_urls:
|
for url in uniq_urls:
|
||||||
splitted = urlparse(url)
|
splitted = urlparse(url)
|
||||||
domains.add(splitted.hostname)
|
if splitted.hostname:
|
||||||
|
domains.add(splitted.hostname)
|
||||||
return domains
|
return domains
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(64)
|
@lru_cache(64)
|
||||||
def get_useragent_for_requests():
|
def get_useragent_for_requests() -> str:
|
||||||
return f'Lookyloo / {version("lookyloo")}'
|
return f'Lookyloo / {version("lookyloo")}'
|
||||||
|
|
||||||
|
|
||||||
|
@ -331,11 +331,11 @@ class ParsedUserAgent(UserAgent):
|
||||||
# from https://python.tutorialink.com/how-do-i-get-the-user-agent-with-flask/
|
# from https://python.tutorialink.com/how-do-i-get-the-user-agent-with-flask/
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def _details(self):
|
def _details(self) -> Dict[str, Any]:
|
||||||
return user_agent_parser.Parse(self.string)
|
return user_agent_parser.Parse(self.string)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def platform(self):
|
def platform(self) -> Optional[str]: # type: ignore[override]
|
||||||
return self._details['os'].get('family')
|
return self._details['os'].get('family')
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -343,11 +343,11 @@ class ParsedUserAgent(UserAgent):
|
||||||
return self._aggregate_version(self._details['os'])
|
return self._aggregate_version(self._details['os'])
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def browser(self):
|
def browser(self) -> Optional[str]: # type: ignore[override]
|
||||||
return self._details['user_agent'].get('family')
|
return self._details['user_agent'].get('family')
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def version(self):
|
def version(self) -> Optional[str]: # type: ignore[override]
|
||||||
return self._aggregate_version(self._details['user_agent'])
|
return self._aggregate_version(self._details['user_agent'])
|
||||||
|
|
||||||
def _aggregate_version(self, details: Dict[str, str]) -> Optional[str]:
|
def _aggregate_version(self, details: Dict[str, str]) -> Optional[str]:
|
||||||
|
@ -357,5 +357,5 @@ class ParsedUserAgent(UserAgent):
|
||||||
if (part := details.get(key)) is not None
|
if (part := details.get(key)) is not None
|
||||||
)
|
)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self) -> str:
|
||||||
return f'OS: {self.platform} - Browser: {self.browser} {self.version} - UA: {self.string}'
|
return f'OS: {self.platform} - Browser: {self.browser} {self.version} - UA: {self.string}'
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
# import re
|
# import re
|
||||||
|
@ -7,7 +9,7 @@ from collections import defaultdict
|
||||||
from typing import Dict, Iterable, List, Optional, Set, Tuple
|
from typing import Dict, Iterable, List, Optional, Set, Tuple
|
||||||
from urllib.parse import urlsplit
|
from urllib.parse import urlsplit
|
||||||
|
|
||||||
from har2tree import CrawledTree
|
from har2tree import CrawledTree # type: ignore[attr-defined]
|
||||||
from redis import ConnectionPool, Redis
|
from redis import ConnectionPool, Redis
|
||||||
from redis.connection import UnixDomainSocketConnection
|
from redis.connection import UnixDomainSocketConnection
|
||||||
|
|
||||||
|
@ -23,11 +25,11 @@ class Indexing():
|
||||||
self.redis_pool: ConnectionPool = ConnectionPool(connection_class=UnixDomainSocketConnection,
|
self.redis_pool: ConnectionPool = ConnectionPool(connection_class=UnixDomainSocketConnection,
|
||||||
path=get_socket_path('indexing'), decode_responses=True)
|
path=get_socket_path('indexing'), decode_responses=True)
|
||||||
|
|
||||||
def clear_indexes(self):
|
def clear_indexes(self) -> None:
|
||||||
self.redis.flushdb()
|
self.redis.flushdb()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def redis(self):
|
def redis(self) -> Redis: # type: ignore[type-arg]
|
||||||
return Redis(connection_pool=self.redis_pool)
|
return Redis(connection_pool=self.redis_pool)
|
||||||
|
|
||||||
def new_internal_uuids(self, crawled_tree: CrawledTree) -> None:
|
def new_internal_uuids(self, crawled_tree: CrawledTree) -> None:
|
||||||
|
@ -45,25 +47,25 @@ class Indexing():
|
||||||
# ###### Cookies ######
|
# ###### Cookies ######
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def cookies_names(self) -> List[Tuple[str, float]]:
|
def cookies_names(self) -> list[tuple[str, float]]:
|
||||||
return self.redis.zrevrange('cookies_names', 0, -1, withscores=True)
|
return self.redis.zrevrange('cookies_names', 0, -1, withscores=True)
|
||||||
|
|
||||||
def cookies_names_number_domains(self, cookie_name: str) -> int:
|
def cookies_names_number_domains(self, cookie_name: str) -> int:
|
||||||
return self.redis.zcard(f'cn|{cookie_name}')
|
return self.redis.zcard(f'cn|{cookie_name}')
|
||||||
|
|
||||||
def cookies_names_domains_values(self, cookie_name: str, domain: str) -> List[Tuple[str, float]]:
|
def cookies_names_domains_values(self, cookie_name: str, domain: str) -> list[tuple[str, float]]:
|
||||||
return self.redis.zrevrange(f'cn|{cookie_name}|{domain}', 0, -1, withscores=True)
|
return self.redis.zrevrange(f'cn|{cookie_name}|{domain}', 0, -1, withscores=True)
|
||||||
|
|
||||||
def get_cookie_domains(self, cookie_name: str) -> List[Tuple[str, float]]:
|
def get_cookie_domains(self, cookie_name: str) -> list[tuple[str, float]]:
|
||||||
return self.redis.zrevrange(f'cn|{cookie_name}', 0, -1, withscores=True)
|
return self.redis.zrevrange(f'cn|{cookie_name}', 0, -1, withscores=True)
|
||||||
|
|
||||||
def get_cookies_names_captures(self, cookie_name: str) -> List[Tuple[str, str]]:
|
def get_cookies_names_captures(self, cookie_name: str) -> list[tuple[str, str]]:
|
||||||
return [uuids.split('|') for uuids in self.redis.smembers(f'cn|{cookie_name}|captures')]
|
return [uuids.split('|') for uuids in self.redis.smembers(f'cn|{cookie_name}|captures')]
|
||||||
|
|
||||||
def _reindex_cookies_capture(self, crawled_tree: CrawledTree) -> None:
|
def _reindex_cookies_capture(self, crawled_tree: CrawledTree) -> None:
|
||||||
pipeline = self.redis.pipeline()
|
pipeline = self.redis.pipeline()
|
||||||
already_loaded: Set[Tuple[str, str]] = set()
|
already_loaded: set[tuple[str, str]] = set()
|
||||||
already_cleaned_up: Set[str] = set()
|
already_cleaned_up: set[str] = set()
|
||||||
for urlnode in crawled_tree.root_hartree.url_tree.traverse():
|
for urlnode in crawled_tree.root_hartree.url_tree.traverse():
|
||||||
if 'cookies_received' not in urlnode.features:
|
if 'cookies_received' not in urlnode.features:
|
||||||
continue
|
continue
|
||||||
|
@ -90,7 +92,7 @@ class Indexing():
|
||||||
self.redis.sadd('indexed_cookies', crawled_tree.uuid)
|
self.redis.sadd('indexed_cookies', crawled_tree.uuid)
|
||||||
|
|
||||||
pipeline = self.redis.pipeline()
|
pipeline = self.redis.pipeline()
|
||||||
already_loaded: Set[Tuple[str, str]] = set()
|
already_loaded: set[tuple[str, str]] = set()
|
||||||
for urlnode in crawled_tree.root_hartree.url_tree.traverse():
|
for urlnode in crawled_tree.root_hartree.url_tree.traverse():
|
||||||
if 'cookies_received' not in urlnode.features:
|
if 'cookies_received' not in urlnode.features:
|
||||||
continue
|
continue
|
||||||
|
@ -131,13 +133,13 @@ class Indexing():
|
||||||
# ###### Body hashes ######
|
# ###### Body hashes ######
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def ressources(self) -> List[Tuple[str, float]]:
|
def ressources(self) -> list[tuple[str, float]]:
|
||||||
return self.redis.zrevrange('body_hashes', 0, 200, withscores=True)
|
return self.redis.zrevrange('body_hashes', 0, 200, withscores=True)
|
||||||
|
|
||||||
def ressources_number_domains(self, h: str) -> int:
|
def ressources_number_domains(self, h: str) -> int:
|
||||||
return self.redis.zcard(f'bh|{h}')
|
return self.redis.zcard(f'bh|{h}')
|
||||||
|
|
||||||
def body_hash_fequency(self, body_hash: str) -> Dict[str, int]:
|
def body_hash_fequency(self, body_hash: str) -> dict[str, int]:
|
||||||
pipeline = self.redis.pipeline()
|
pipeline = self.redis.pipeline()
|
||||||
pipeline.zscore('body_hashes', body_hash)
|
pipeline.zscore('body_hashes', body_hash)
|
||||||
pipeline.zcard(f'bh|{body_hash}')
|
pipeline.zcard(f'bh|{body_hash}')
|
||||||
|
@ -151,7 +153,7 @@ class Indexing():
|
||||||
|
|
||||||
def _reindex_body_hashes_capture(self, crawled_tree: CrawledTree) -> None:
|
def _reindex_body_hashes_capture(self, crawled_tree: CrawledTree) -> None:
|
||||||
# if the capture is regenerated, the hostnodes/urlnodes UUIDs are changed
|
# if the capture is regenerated, the hostnodes/urlnodes UUIDs are changed
|
||||||
cleaned_up_hashes: Set[str] = set()
|
cleaned_up_hashes: set[str] = set()
|
||||||
pipeline = self.redis.pipeline()
|
pipeline = self.redis.pipeline()
|
||||||
for urlnode in crawled_tree.root_hartree.url_tree.traverse():
|
for urlnode in crawled_tree.root_hartree.url_tree.traverse():
|
||||||
for h in urlnode.resources_hashes:
|
for h in urlnode.resources_hashes:
|
||||||
|
@ -181,17 +183,17 @@ class Indexing():
|
||||||
f'{urlnode.uuid}|{urlnode.hostnode_uuid}|{urlnode.name}')
|
f'{urlnode.uuid}|{urlnode.hostnode_uuid}|{urlnode.name}')
|
||||||
pipeline.execute()
|
pipeline.execute()
|
||||||
|
|
||||||
def get_hash_uuids(self, body_hash: str) -> Tuple[str, str, str]:
|
def get_hash_uuids(self, body_hash: str) -> tuple[str, str, str]:
|
||||||
"""Use that to get a reference allowing to fetch a resource from one of the capture."""
|
"""Use that to get a reference allowing to fetch a resource from one of the capture."""
|
||||||
capture_uuid: str = self.redis.srandmember(f'bh|{body_hash}|captures')
|
capture_uuid = str(self.redis.srandmember(f'bh|{body_hash}|captures'))
|
||||||
entry = self.redis.zrange(f'bh|{body_hash}|captures|{capture_uuid}', 0, 1)[0]
|
entry = self.redis.zrange(f'bh|{body_hash}|captures|{capture_uuid}', 0, 1)[0]
|
||||||
urlnode_uuid, hostnode_uuid, url = entry.split('|', 2)
|
urlnode_uuid, hostnode_uuid, url = entry.split('|', 2)
|
||||||
return capture_uuid, urlnode_uuid, hostnode_uuid
|
return capture_uuid, urlnode_uuid, hostnode_uuid
|
||||||
|
|
||||||
def get_body_hash_captures(self, body_hash: str, filter_url: Optional[str]=None,
|
def get_body_hash_captures(self, body_hash: str, filter_url: str | None=None,
|
||||||
filter_capture_uuid: Optional[str]=None,
|
filter_capture_uuid: str | None=None,
|
||||||
limit: int=20,
|
limit: int=20,
|
||||||
prefered_uuids: Set[str]=set()) -> Tuple[int, List[Tuple[str, str, str, bool]]]:
|
prefered_uuids: set[str]=set()) -> tuple[int, list[tuple[str, str, str, bool]]]:
|
||||||
'''Get the captures matching the hash.
|
'''Get the captures matching the hash.
|
||||||
|
|
||||||
:param filter_url: URL of the hash we're searching for
|
:param filter_url: URL of the hash we're searching for
|
||||||
|
@ -199,7 +201,7 @@ class Indexing():
|
||||||
:param limit: Max matching captures to return, -1 means unlimited.
|
:param limit: Max matching captures to return, -1 means unlimited.
|
||||||
:param prefered_uuids: UUID cached right now, so we don't rebuild trees.
|
:param prefered_uuids: UUID cached right now, so we don't rebuild trees.
|
||||||
'''
|
'''
|
||||||
to_return: List[Tuple[str, str, str, bool]] = []
|
to_return: list[tuple[str, str, str, bool]] = []
|
||||||
len_captures = self.redis.scard(f'bh|{body_hash}|captures')
|
len_captures = self.redis.scard(f'bh|{body_hash}|captures')
|
||||||
unlimited = False
|
unlimited = False
|
||||||
if limit == -1:
|
if limit == -1:
|
||||||
|
@ -224,11 +226,11 @@ class Indexing():
|
||||||
break
|
break
|
||||||
return len_captures, to_return
|
return len_captures, to_return
|
||||||
|
|
||||||
def get_body_hash_domains(self, body_hash: str) -> List[Tuple[str, float]]:
|
def get_body_hash_domains(self, body_hash: str) -> list[tuple[str, float]]:
|
||||||
return self.redis.zrevrange(f'bh|{body_hash}', 0, -1, withscores=True)
|
return self.redis.zrevrange(f'bh|{body_hash}', 0, -1, withscores=True)
|
||||||
|
|
||||||
def get_body_hash_urls(self, body_hash: str) -> Dict[str, List[Dict[str, str]]]:
|
def get_body_hash_urls(self, body_hash: str) -> dict[str, list[dict[str, str]]]:
|
||||||
all_captures: Set[str] = self.redis.smembers(f'bh|{body_hash}|captures')
|
all_captures: set[str] = self.redis.smembers(f'bh|{body_hash}|captures')
|
||||||
urls = defaultdict(list)
|
urls = defaultdict(list)
|
||||||
for capture_uuid in list(all_captures):
|
for capture_uuid in list(all_captures):
|
||||||
for entry in self.redis.zrevrange(f'bh|{body_hash}|captures|{capture_uuid}', 0, -1):
|
for entry in self.redis.zrevrange(f'bh|{body_hash}|captures|{capture_uuid}', 0, -1):
|
||||||
|
@ -239,19 +241,19 @@ class Indexing():
|
||||||
# ###### HTTP Headers Hashes ######
|
# ###### HTTP Headers Hashes ######
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def http_headers_hashes(self) -> List[Tuple[str, float]]:
|
def http_headers_hashes(self) -> list[tuple[str, float]]:
|
||||||
return self.redis.zrevrange('hhhashes', 0, -1, withscores=True)
|
return self.redis.zrevrange('hhhashes', 0, -1, withscores=True)
|
||||||
|
|
||||||
def http_headers_hashes_number_captures(self, hhh: str) -> int:
|
def http_headers_hashes_number_captures(self, hhh: str) -> int:
|
||||||
return self.redis.scard(f'hhhashes|{hhh}|captures')
|
return self.redis.scard(f'hhhashes|{hhh}|captures')
|
||||||
|
|
||||||
def get_http_headers_hashes_captures(self, hhh: str) -> List[Tuple[str, str]]:
|
def get_http_headers_hashes_captures(self, hhh: str) -> list[tuple[str, str]]:
|
||||||
return [uuids.split('|') for uuids in self.redis.smembers(f'hhhashes|{hhh}|captures')]
|
return [uuids.split('|') for uuids in self.redis.smembers(f'hhhashes|{hhh}|captures')]
|
||||||
|
|
||||||
def _reindex_http_headers_hashes_capture(self, crawled_tree: CrawledTree) -> None:
|
def _reindex_http_headers_hashes_capture(self, crawled_tree: CrawledTree) -> None:
|
||||||
pipeline = self.redis.pipeline()
|
pipeline = self.redis.pipeline()
|
||||||
already_loaded: Set[str] = set()
|
already_loaded: set[str] = set()
|
||||||
already_cleaned_up: Set[str] = set()
|
already_cleaned_up: set[str] = set()
|
||||||
for urlnode in crawled_tree.root_hartree.url_tree.traverse():
|
for urlnode in crawled_tree.root_hartree.url_tree.traverse():
|
||||||
if 'hhhash' not in urlnode.features:
|
if 'hhhash' not in urlnode.features:
|
||||||
continue
|
continue
|
||||||
|
@ -276,7 +278,7 @@ class Indexing():
|
||||||
self.redis.sadd('indexed_hhhashes', crawled_tree.uuid)
|
self.redis.sadd('indexed_hhhashes', crawled_tree.uuid)
|
||||||
|
|
||||||
pipeline = self.redis.pipeline()
|
pipeline = self.redis.pipeline()
|
||||||
already_loaded: Set[str] = set()
|
already_loaded: set[str] = set()
|
||||||
for urlnode in crawled_tree.root_hartree.url_tree.traverse():
|
for urlnode in crawled_tree.root_hartree.url_tree.traverse():
|
||||||
if 'hhhash' not in urlnode.features:
|
if 'hhhash' not in urlnode.features:
|
||||||
continue
|
continue
|
||||||
|
@ -291,11 +293,11 @@ class Indexing():
|
||||||
# ###### URLs and Domains ######
|
# ###### URLs and Domains ######
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def urls(self) -> List[Tuple[str, float]]:
|
def urls(self) -> list[tuple[str, float]]:
|
||||||
return self.redis.zrevrange('urls', 0, 200, withscores=True)
|
return self.redis.zrevrange('urls', 0, 200, withscores=True)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def hostnames(self) -> List[Tuple[str, float]]:
|
def hostnames(self) -> list[tuple[str, float]]:
|
||||||
return self.redis.zrevrange('hostnames', 0, 200, withscores=True)
|
return self.redis.zrevrange('hostnames', 0, 200, withscores=True)
|
||||||
|
|
||||||
def index_url_capture(self, crawled_tree: CrawledTree) -> None:
|
def index_url_capture(self, crawled_tree: CrawledTree) -> None:
|
||||||
|
@ -316,21 +318,21 @@ class Indexing():
|
||||||
pipeline.sadd(f'urls|{md5}|captures', crawled_tree.uuid)
|
pipeline.sadd(f'urls|{md5}|captures', crawled_tree.uuid)
|
||||||
pipeline.execute()
|
pipeline.execute()
|
||||||
|
|
||||||
def get_captures_url(self, url: str) -> Set[str]:
|
def get_captures_url(self, url: str) -> set[str]:
|
||||||
md5 = hashlib.md5(url.encode()).hexdigest()
|
md5 = hashlib.md5(url.encode()).hexdigest()
|
||||||
return self.redis.smembers(f'urls|{md5}|captures')
|
return self.redis.smembers(f'urls|{md5}|captures')
|
||||||
|
|
||||||
def get_captures_hostname(self, hostname: str) -> Set[str]:
|
def get_captures_hostname(self, hostname: str) -> set[str]:
|
||||||
return self.redis.smembers(f'hostnames|{hostname}|captures')
|
return self.redis.smembers(f'hostnames|{hostname}|captures')
|
||||||
|
|
||||||
# ###### Categories ######
|
# ###### Categories ######
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def categories(self) -> List[Tuple[str, int]]:
|
def categories(self) -> list[tuple[str, int]]:
|
||||||
return [(c, int(score))
|
return [(c, int(score))
|
||||||
for c, score in self.redis.zrevrange('categories', 0, 200, withscores=True)]
|
for c, score in self.redis.zrevrange('categories', 0, 200, withscores=True)]
|
||||||
|
|
||||||
def index_categories_capture(self, capture_uuid: str, categories: Iterable[str]):
|
def index_categories_capture(self, capture_uuid: str, categories: Iterable[str]) -> None:
|
||||||
if not categories:
|
if not categories:
|
||||||
return
|
return
|
||||||
if self.redis.sismember('indexed_categories', capture_uuid):
|
if self.redis.sismember('indexed_categories', capture_uuid):
|
||||||
|
@ -345,5 +347,5 @@ class Indexing():
|
||||||
pipeline.sadd(category, capture_uuid)
|
pipeline.sadd(category, capture_uuid)
|
||||||
pipeline.execute()
|
pipeline.execute()
|
||||||
|
|
||||||
def get_captures_category(self, category: str) -> Set[str]:
|
def get_captures_category(self, category: str) -> set[str]:
|
||||||
return self.redis.smembers(category)
|
return self.redis.smembers(category)
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
import copy
|
import copy
|
||||||
import gzip
|
import gzip
|
||||||
|
@ -22,7 +24,7 @@ from uuid import uuid4
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
|
|
||||||
from defang import defang # type: ignore
|
from defang import defang # type: ignore
|
||||||
from har2tree import CrawledTree, HostNode, URLNode
|
from har2tree import CrawledTree, HostNode, URLNode # type: ignore[attr-defined]
|
||||||
from lacuscore import (LacusCore,
|
from lacuscore import (LacusCore,
|
||||||
CaptureStatus as CaptureStatusCore,
|
CaptureStatus as CaptureStatusCore,
|
||||||
# CaptureResponse as CaptureResponseCore)
|
# CaptureResponse as CaptureResponseCore)
|
||||||
|
@ -30,15 +32,15 @@ from lacuscore import (LacusCore,
|
||||||
CaptureSettings as CaptureSettingsCore)
|
CaptureSettings as CaptureSettingsCore)
|
||||||
from PIL import Image, UnidentifiedImageError
|
from PIL import Image, UnidentifiedImageError
|
||||||
from playwrightcapture import get_devices
|
from playwrightcapture import get_devices
|
||||||
from pylacus import (PyLacus,
|
from pylacus import (PyLacus, # type: ignore[attr-defined]
|
||||||
CaptureStatus as CaptureStatusPy
|
CaptureStatus as CaptureStatusPy
|
||||||
# CaptureResponse as CaptureResponsePy,
|
# CaptureResponse as CaptureResponsePy,
|
||||||
# CaptureResponseJson as CaptureResponseJsonPy,
|
# CaptureResponseJson as CaptureResponseJsonPy,
|
||||||
# CaptureSettings as CaptureSettingsPy
|
# CaptureSettings as CaptureSettingsPy
|
||||||
)
|
)
|
||||||
from pymisp import MISPAttribute, MISPEvent, MISPObject
|
from pymisp import MISPAttribute, MISPEvent, MISPObject # type: ignore[attr-defined]
|
||||||
from pysecuritytxt import PySecurityTXT, SecurityTXTNotAvailable
|
from pysecuritytxt import PySecurityTXT, SecurityTXTNotAvailable # type: ignore[attr-defined]
|
||||||
from pylookyloomonitoring import PyLookylooMonitoring
|
from pylookyloomonitoring import PyLookylooMonitoring # type: ignore[attr-defined]
|
||||||
from redis import ConnectionPool, Redis
|
from redis import ConnectionPool, Redis
|
||||||
from redis.connection import UnixDomainSocketConnection
|
from redis.connection import UnixDomainSocketConnection
|
||||||
|
|
||||||
|
@ -62,13 +64,13 @@ if TYPE_CHECKING:
|
||||||
|
|
||||||
class CaptureSettings(CaptureSettingsCore, total=False):
|
class CaptureSettings(CaptureSettingsCore, total=False):
|
||||||
'''The capture settings that can be passed to Lookyloo'''
|
'''The capture settings that can be passed to Lookyloo'''
|
||||||
listing: Optional[int]
|
listing: int | None
|
||||||
not_queued: Optional[int]
|
not_queued: int | None
|
||||||
auto_report: Optional[Union[bool, str, Dict[str, str]]]
|
auto_report: bool | str | dict[str, str] | None
|
||||||
dnt: Optional[str]
|
dnt: str | None
|
||||||
browser_name: Optional[str]
|
browser_name: str | None
|
||||||
os: Optional[str]
|
os: str | None
|
||||||
parent: Optional[str]
|
parent: str | None
|
||||||
|
|
||||||
|
|
||||||
class Lookyloo():
|
class Lookyloo():
|
||||||
|
@ -153,13 +155,13 @@ class Lookyloo():
|
||||||
self.lacus
|
self.lacus
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def redis(self):
|
def redis(self) -> Redis: # type: ignore[type-arg]
|
||||||
return Redis(connection_pool=self.redis_pool)
|
return Redis(connection_pool=self.redis_pool)
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def lacus(self):
|
def lacus(self) -> PyLacus | LacusCore:
|
||||||
has_remote_lacus = False
|
has_remote_lacus = False
|
||||||
self._lacus: Union[PyLacus, LacusCore]
|
self._lacus: PyLacus | LacusCore
|
||||||
if get_config('generic', 'remote_lacus'):
|
if get_config('generic', 'remote_lacus'):
|
||||||
remote_lacus_config = get_config('generic', 'remote_lacus')
|
remote_lacus_config = get_config('generic', 'remote_lacus')
|
||||||
if remote_lacus_config.get('enable'):
|
if remote_lacus_config.get('enable'):
|
||||||
|
@ -180,7 +182,7 @@ class Lookyloo():
|
||||||
|
|
||||||
if not has_remote_lacus:
|
if not has_remote_lacus:
|
||||||
# We need a redis connector that doesn't decode.
|
# We need a redis connector that doesn't decode.
|
||||||
redis: Redis = Redis(unix_socket_path=get_socket_path('cache'))
|
redis: Redis = Redis(unix_socket_path=get_socket_path('cache')) # type: ignore[type-arg]
|
||||||
self._lacus = LacusCore(redis, tor_proxy=get_config('generic', 'tor_proxy'),
|
self._lacus = LacusCore(redis, tor_proxy=get_config('generic', 'tor_proxy'),
|
||||||
max_capture_time=get_config('generic', 'max_capture_time'),
|
max_capture_time=get_config('generic', 'max_capture_time'),
|
||||||
only_global_lookups=get_config('generic', 'only_global_lookups'),
|
only_global_lookups=get_config('generic', 'only_global_lookups'),
|
||||||
|
@ -188,14 +190,14 @@ class Lookyloo():
|
||||||
return self._lacus
|
return self._lacus
|
||||||
|
|
||||||
def add_context(self, capture_uuid: str, /, urlnode_uuid: str, *, ressource_hash: str,
|
def add_context(self, capture_uuid: str, /, urlnode_uuid: str, *, ressource_hash: str,
|
||||||
legitimate: bool, malicious: bool, details: Dict[str, Dict[str, str]]):
|
legitimate: bool, malicious: bool, details: dict[str, dict[str, str]]) -> None:
|
||||||
'''Adds context information to a capture or a URL node'''
|
'''Adds context information to a capture or a URL node'''
|
||||||
if malicious:
|
if malicious:
|
||||||
self.context.add_malicious(ressource_hash, details['malicious'])
|
self.context.add_malicious(ressource_hash, details['malicious'])
|
||||||
if legitimate:
|
if legitimate:
|
||||||
self.context.add_legitimate(ressource_hash, details['legitimate'])
|
self.context.add_legitimate(ressource_hash, details['legitimate'])
|
||||||
|
|
||||||
def add_to_legitimate(self, capture_uuid: str, /, hostnode_uuid: Optional[str]=None, urlnode_uuid: Optional[str]=None):
|
def add_to_legitimate(self, capture_uuid: str, /, hostnode_uuid: str | None=None, urlnode_uuid: str | None=None) -> None:
|
||||||
'''Mark a full capture as legitimate.
|
'''Mark a full capture as legitimate.
|
||||||
Iterates over all the nodes and mark them all as legitimate too.'''
|
Iterates over all the nodes and mark them all as legitimate too.'''
|
||||||
ct = self.get_crawled_tree(capture_uuid)
|
ct = self.get_crawled_tree(capture_uuid)
|
||||||
|
@ -225,12 +227,12 @@ class Lookyloo():
|
||||||
ct = self.get_crawled_tree(capture_uuid)
|
ct = self.get_crawled_tree(capture_uuid)
|
||||||
return ct.root_hartree.get_host_node_by_uuid(node_uuid)
|
return ct.root_hartree.get_host_node_by_uuid(node_uuid)
|
||||||
|
|
||||||
def get_statistics(self, capture_uuid: str, /) -> Dict[str, Any]:
|
def get_statistics(self, capture_uuid: str, /) -> dict[str, Any]:
|
||||||
'''Get the statistics of a capture.'''
|
'''Get the statistics of a capture.'''
|
||||||
ct = self.get_crawled_tree(capture_uuid)
|
ct = self.get_crawled_tree(capture_uuid)
|
||||||
return ct.root_hartree.stats
|
return ct.root_hartree.stats
|
||||||
|
|
||||||
def get_info(self, capture_uuid: str, /) -> Dict[str, Any]:
|
def get_info(self, capture_uuid: str, /) -> dict[str, Any]:
|
||||||
'''Get basic information about the capture.'''
|
'''Get basic information about the capture.'''
|
||||||
cache = self.capture_cache(capture_uuid)
|
cache = self.capture_cache(capture_uuid)
|
||||||
if not cache:
|
if not cache:
|
||||||
|
@ -254,7 +256,7 @@ class Lookyloo():
|
||||||
to_return['referer'] = cache.referer if cache.referer else ''
|
to_return['referer'] = cache.referer if cache.referer else ''
|
||||||
return to_return
|
return to_return
|
||||||
|
|
||||||
def get_meta(self, capture_uuid: str, /) -> Dict[str, str]:
|
def get_meta(self, capture_uuid: str, /) -> dict[str, str]:
|
||||||
'''Get the meta informations from a capture (mostly, details about the User Agent used.)'''
|
'''Get the meta informations from a capture (mostly, details about the User Agent used.)'''
|
||||||
cache = self.capture_cache(capture_uuid)
|
cache = self.capture_cache(capture_uuid)
|
||||||
if not cache:
|
if not cache:
|
||||||
|
@ -294,7 +296,7 @@ class Lookyloo():
|
||||||
return json.load(f)
|
return json.load(f)
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def categories_capture(self, capture_uuid: str, /) -> Dict[str, Any]:
|
def categories_capture(self, capture_uuid: str, /) -> dict[str, Any]:
|
||||||
'''Get all the categories related to a capture, in MISP Taxonomies format'''
|
'''Get all the categories related to a capture, in MISP Taxonomies format'''
|
||||||
categ_file = self._captures_index[capture_uuid].capture_dir / 'categories'
|
categ_file = self._captures_index[capture_uuid].capture_dir / 'categories'
|
||||||
# get existing categories if possible
|
# get existing categories if possible
|
||||||
|
@ -337,7 +339,7 @@ class Lookyloo():
|
||||||
with categ_file.open('w') as f:
|
with categ_file.open('w') as f:
|
||||||
f.writelines(f'{t}\n' for t in current_categories)
|
f.writelines(f'{t}\n' for t in current_categories)
|
||||||
|
|
||||||
def trigger_modules(self, capture_uuid: str, /, force: bool=False, auto_trigger: bool=False) -> Dict:
|
def trigger_modules(self, capture_uuid: str, /, force: bool=False, auto_trigger: bool=False) -> dict[str, Any]:
|
||||||
'''Launch the 3rd party modules on a capture.
|
'''Launch the 3rd party modules on a capture.
|
||||||
It uses the cached result *if* the module was triggered the same day.
|
It uses the cached result *if* the module was triggered the same day.
|
||||||
The `force` flag re-triggers the module regardless of the cache.'''
|
The `force` flag re-triggers the module regardless of the cache.'''
|
||||||
|
@ -350,8 +352,8 @@ class Lookyloo():
|
||||||
self.uwhois.capture_default_trigger(ct, force=force, auto_trigger=auto_trigger)
|
self.uwhois.capture_default_trigger(ct, force=force, auto_trigger=auto_trigger)
|
||||||
self.hashlookup.capture_default_trigger(ct, auto_trigger=auto_trigger)
|
self.hashlookup.capture_default_trigger(ct, auto_trigger=auto_trigger)
|
||||||
|
|
||||||
to_return: Dict[str, Dict] = {'PhishingInitiative': {}, 'VirusTotal': {}, 'UrlScan': {},
|
to_return: dict[str, dict[str, Any]] = {'PhishingInitiative': {}, 'VirusTotal': {}, 'UrlScan': {},
|
||||||
'URLhaus': {}}
|
'URLhaus': {}}
|
||||||
if cache := self.capture_cache(capture_uuid):
|
if cache := self.capture_cache(capture_uuid):
|
||||||
to_return['PhishingInitiative'] = self.pi.capture_default_trigger(cache, force=force, auto_trigger=auto_trigger)
|
to_return['PhishingInitiative'] = self.pi.capture_default_trigger(cache, force=force, auto_trigger=auto_trigger)
|
||||||
to_return['VirusTotal'] = self.vt.capture_default_trigger(cache, force=force, auto_trigger=auto_trigger)
|
to_return['VirusTotal'] = self.vt.capture_default_trigger(cache, force=force, auto_trigger=auto_trigger)
|
||||||
|
@ -363,7 +365,7 @@ class Lookyloo():
|
||||||
to_return['URLhaus'] = self.urlhaus.capture_default_trigger(cache, auto_trigger=auto_trigger)
|
to_return['URLhaus'] = self.urlhaus.capture_default_trigger(cache, auto_trigger=auto_trigger)
|
||||||
return to_return
|
return to_return
|
||||||
|
|
||||||
def get_modules_responses(self, capture_uuid: str, /) -> Optional[Dict[str, Any]]:
|
def get_modules_responses(self, capture_uuid: str, /) -> dict[str, Any] | None:
|
||||||
'''Get the responses of the modules from the cached responses on the disk'''
|
'''Get the responses of the modules from the cached responses on the disk'''
|
||||||
cache = self.capture_cache(capture_uuid)
|
cache = self.capture_cache(capture_uuid)
|
||||||
if not cache:
|
if not cache:
|
||||||
|
@ -373,7 +375,7 @@ class Lookyloo():
|
||||||
self.logger.warning(f'The capture {capture_uuid} does not have a URL in the cache, it is broken.')
|
self.logger.warning(f'The capture {capture_uuid} does not have a URL in the cache, it is broken.')
|
||||||
return None
|
return None
|
||||||
|
|
||||||
to_return: Dict[str, Any] = {}
|
to_return: dict[str, Any] = {}
|
||||||
if self.vt.available:
|
if self.vt.available:
|
||||||
to_return['vt'] = {}
|
to_return['vt'] = {}
|
||||||
if hasattr(cache, 'redirects') and cache.redirects:
|
if hasattr(cache, 'redirects') and cache.redirects:
|
||||||
|
@ -416,7 +418,7 @@ class Lookyloo():
|
||||||
to_return['urlscan']['result'] = result
|
to_return['urlscan']['result'] = result
|
||||||
return to_return
|
return to_return
|
||||||
|
|
||||||
def get_historical_lookups(self, capture_uuid: str, /, force: bool=False) -> Dict:
|
def get_historical_lookups(self, capture_uuid: str, /, force: bool=False) -> dict[str, Any]:
|
||||||
# this method is only trigered when the user wants to get more details about the capture
|
# this method is only trigered when the user wants to get more details about the capture
|
||||||
# by looking at Passive DNS systems, check if there are hits in the current capture
|
# by looking at Passive DNS systems, check if there are hits in the current capture
|
||||||
# in another one and things like that. The trigger_modules method is for getting
|
# in another one and things like that. The trigger_modules method is for getting
|
||||||
|
@ -425,7 +427,7 @@ class Lookyloo():
|
||||||
if not cache:
|
if not cache:
|
||||||
self.logger.warning(f'Unable to get the modules responses unless the capture {capture_uuid} is cached')
|
self.logger.warning(f'Unable to get the modules responses unless the capture {capture_uuid} is cached')
|
||||||
return {}
|
return {}
|
||||||
to_return: Dict[str, Any] = defaultdict(dict)
|
to_return: dict[str, Any] = defaultdict(dict)
|
||||||
if self.riskiq.available:
|
if self.riskiq.available:
|
||||||
try:
|
try:
|
||||||
self.riskiq.capture_default_trigger(cache)
|
self.riskiq.capture_default_trigger(cache)
|
||||||
|
@ -461,7 +463,7 @@ class Lookyloo():
|
||||||
def update_tree_cache_info(self, process_id: int, classname: str) -> None:
|
def update_tree_cache_info(self, process_id: int, classname: str) -> None:
|
||||||
self.redis.hset('tree_cache', f'{process_id}|{classname}', str(self._captures_index.lru_cache_status()))
|
self.redis.hset('tree_cache', f'{process_id}|{classname}', str(self._captures_index.lru_cache_status()))
|
||||||
|
|
||||||
def sorted_capture_cache(self, capture_uuids: Optional[Iterable[str]]=None, cached_captures_only: bool=True, index_cut_time: Optional[datetime]=None) -> List[CaptureCache]:
|
def sorted_capture_cache(self, capture_uuids: Iterable[str] | None=None, cached_captures_only: bool=True, index_cut_time: datetime | None=None) -> list[CaptureCache]:
|
||||||
'''Get all the captures in the cache, sorted by timestamp (new -> old).
|
'''Get all the captures in the cache, sorted by timestamp (new -> old).
|
||||||
By default, this method will only return the captures that are currently cached.'''
|
By default, this method will only return the captures that are currently cached.'''
|
||||||
# Make sure we do not try to load archived captures that would still be in 'lookup_dirs'
|
# Make sure we do not try to load archived captures that would still be in 'lookup_dirs'
|
||||||
|
@ -489,13 +491,13 @@ class Lookyloo():
|
||||||
# Do not try to build pickles
|
# Do not try to build pickles
|
||||||
capture_uuids = set(capture_uuids) & self._captures_index.cached_captures
|
capture_uuids = set(capture_uuids) & self._captures_index.cached_captures
|
||||||
|
|
||||||
all_cache: List[CaptureCache] = [self._captures_index[uuid] for uuid in capture_uuids
|
all_cache: list[CaptureCache] = [self._captures_index[uuid] for uuid in capture_uuids
|
||||||
if self.capture_cache(uuid)
|
if self.capture_cache(uuid)
|
||||||
and hasattr(self._captures_index[uuid], 'timestamp')]
|
and hasattr(self._captures_index[uuid], 'timestamp')]
|
||||||
all_cache.sort(key=operator.attrgetter('timestamp'), reverse=True)
|
all_cache.sort(key=operator.attrgetter('timestamp'), reverse=True)
|
||||||
return all_cache
|
return all_cache
|
||||||
|
|
||||||
def get_capture_status(self, capture_uuid: str, /) -> Union[CaptureStatusCore, CaptureStatusPy]:
|
def get_capture_status(self, capture_uuid: str, /) -> CaptureStatusCore | CaptureStatusPy:
|
||||||
'''Returns the status (queued, ongoing, done, or UUID unknown)'''
|
'''Returns the status (queued, ongoing, done, or UUID unknown)'''
|
||||||
if self.redis.hexists('lookup_dirs', capture_uuid):
|
if self.redis.hexists('lookup_dirs', capture_uuid):
|
||||||
return CaptureStatusCore.DONE
|
return CaptureStatusCore.DONE
|
||||||
|
@ -520,7 +522,7 @@ class Lookyloo():
|
||||||
return CaptureStatusCore.ONGOING
|
return CaptureStatusCore.ONGOING
|
||||||
return lacus_status
|
return lacus_status
|
||||||
|
|
||||||
def capture_cache(self, capture_uuid: str, /, *, force_update: bool = False) -> Optional[CaptureCache]:
|
def capture_cache(self, capture_uuid: str, /, *, force_update: bool = False) -> CaptureCache | None:
|
||||||
"""Get the cache from redis, rebuild the tree if the internal UUID changed => slow"""
|
"""Get the cache from redis, rebuild the tree if the internal UUID changed => slow"""
|
||||||
try:
|
try:
|
||||||
cache = self._captures_index[capture_uuid]
|
cache = self._captures_index[capture_uuid]
|
||||||
|
@ -598,7 +600,7 @@ class Lookyloo():
|
||||||
query['user_agent'] = user_agent if user_agent else self.user_agents.default['useragent']
|
query['user_agent'] = user_agent if user_agent else self.user_agents.default['useragent']
|
||||||
|
|
||||||
# NOTE: the document must be base64 encoded
|
# NOTE: the document must be base64 encoded
|
||||||
document: Optional[Union[str, bytes]] = query.pop('document', None)
|
document: str | bytes | None = query.pop('document', None)
|
||||||
if document:
|
if document:
|
||||||
if isinstance(document, bytes):
|
if isinstance(document, bytes):
|
||||||
query['document'] = base64.b64encode(document).decode()
|
query['document'] = base64.b64encode(document).decode()
|
||||||
|
@ -631,17 +633,16 @@ class Lookyloo():
|
||||||
query = self._prepare_lacus_query(query)
|
query = self._prepare_lacus_query(query)
|
||||||
|
|
||||||
priority = get_priority(source, user, authenticated)
|
priority = get_priority(source, user, authenticated)
|
||||||
query['priority'] = priority
|
|
||||||
if priority < -100:
|
if priority < -100:
|
||||||
# Someone is probably abusing the system with useless URLs, remove them from the index
|
# Someone is probably abusing the system with useless URLs, remove them from the index
|
||||||
query['listing'] = 0
|
query['listing'] = 0
|
||||||
try:
|
try:
|
||||||
perma_uuid = self.lacus.enqueue(
|
perma_uuid = self.lacus.enqueue( # type: ignore[misc]
|
||||||
url=query.get('url', None),
|
url=query.get('url', None),
|
||||||
document_name=query.get('document_name', None),
|
document_name=query.get('document_name', None),
|
||||||
document=query.get('document', None),
|
document=query.get('document', None),
|
||||||
# depth=query.get('depth', 0),
|
# depth=query.get('depth', 0),
|
||||||
browser=query.get('browser', None),
|
browser=query.get('browser', None), # type: ignore[arg-type]
|
||||||
device_name=query.get('device_name', None),
|
device_name=query.get('device_name', None),
|
||||||
user_agent=query.get('user_agent', None),
|
user_agent=query.get('user_agent', None),
|
||||||
proxy=self.global_proxy if self.global_proxy else query.get('proxy', None),
|
proxy=self.global_proxy if self.global_proxy else query.get('proxy', None),
|
||||||
|
@ -659,7 +660,7 @@ class Lookyloo():
|
||||||
with_favicon=query.get('with_favicon', True),
|
with_favicon=query.get('with_favicon', True),
|
||||||
# force=query.get('force', False),
|
# force=query.get('force', False),
|
||||||
# recapture_interval=query.get('recapture_interval', 300),
|
# recapture_interval=query.get('recapture_interval', 300),
|
||||||
priority=query.get('priority', 0)
|
priority=priority
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.critical(f'Unable to enqueue capture: {e}')
|
self.logger.critical(f'Unable to enqueue capture: {e}')
|
||||||
|
@ -670,7 +671,7 @@ class Lookyloo():
|
||||||
and self.redis.zscore('to_capture', perma_uuid) is None): # capture ongoing
|
and self.redis.zscore('to_capture', perma_uuid) is None): # capture ongoing
|
||||||
|
|
||||||
# Make the settings redis compatible
|
# Make the settings redis compatible
|
||||||
mapping_capture: Dict[str, Union[bytes, float, int, str]] = {}
|
mapping_capture: dict[str, bytes | float | int | str] = {}
|
||||||
for key, value in query.items():
|
for key, value in query.items():
|
||||||
if isinstance(value, bool):
|
if isinstance(value, bool):
|
||||||
mapping_capture[key] = 1 if value else 0
|
mapping_capture[key] = 1 if value else 0
|
||||||
|
@ -681,15 +682,15 @@ class Lookyloo():
|
||||||
mapping_capture[key] = value # type: ignore
|
mapping_capture[key] = value # type: ignore
|
||||||
|
|
||||||
p = self.redis.pipeline()
|
p = self.redis.pipeline()
|
||||||
p.zadd('to_capture', {perma_uuid: query['priority']})
|
p.zadd('to_capture', {perma_uuid: priority})
|
||||||
p.hset(perma_uuid, mapping=mapping_capture)
|
p.hset(perma_uuid, mapping=mapping_capture) # type: ignore[arg-type]
|
||||||
p.zincrby('queues', 1, f'{source}|{authenticated}|{user}')
|
p.zincrby('queues', 1, f'{source}|{authenticated}|{user}')
|
||||||
p.set(f'{perma_uuid}_mgmt', f'{source}|{authenticated}|{user}')
|
p.set(f'{perma_uuid}_mgmt', f'{source}|{authenticated}|{user}')
|
||||||
p.execute()
|
p.execute()
|
||||||
|
|
||||||
return perma_uuid
|
return perma_uuid
|
||||||
|
|
||||||
def takedown_details(self, hostnode: HostNode) -> Dict[str, Any]:
|
def takedown_details(self, hostnode: HostNode) -> dict[str, Any]:
|
||||||
if not self.uwhois.available:
|
if not self.uwhois.available:
|
||||||
self.logger.warning('UWhois module not enabled, unable to use this method')
|
self.logger.warning('UWhois module not enabled, unable to use this method')
|
||||||
raise LookylooException('UWhois module not enabled, unable to use this method')
|
raise LookylooException('UWhois module not enabled, unable to use this method')
|
||||||
|
@ -740,7 +741,7 @@ class Lookyloo():
|
||||||
to_return['all_emails'] = list(to_return['all_emails'])
|
to_return['all_emails'] = list(to_return['all_emails'])
|
||||||
return to_return
|
return to_return
|
||||||
|
|
||||||
def contacts(self, capture_uuid: str, /) -> List[Dict[str, Any]]:
|
def contacts(self, capture_uuid: str, /) -> list[dict[str, Any]]:
|
||||||
capture = self.get_crawled_tree(capture_uuid)
|
capture = self.get_crawled_tree(capture_uuid)
|
||||||
rendered_hostnode = self.get_hostnode_from_tree(capture_uuid, capture.root_hartree.rendered_node.hostnode_uuid)
|
rendered_hostnode = self.get_hostnode_from_tree(capture_uuid, capture.root_hartree.rendered_node.hostnode_uuid)
|
||||||
result = []
|
result = []
|
||||||
|
@ -749,7 +750,7 @@ class Lookyloo():
|
||||||
result.append(self.takedown_details(rendered_hostnode))
|
result.append(self.takedown_details(rendered_hostnode))
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def send_mail(self, capture_uuid: str, /, email: str='', comment: Optional[str]=None) -> None:
|
def send_mail(self, capture_uuid: str, /, email: str='', comment: str | None=None) -> None:
|
||||||
'''Send an email notification regarding a specific capture'''
|
'''Send an email notification regarding a specific capture'''
|
||||||
if not get_config('generic', 'enable_mail_notification'):
|
if not get_config('generic', 'enable_mail_notification'):
|
||||||
return
|
return
|
||||||
|
@ -856,7 +857,7 @@ class Lookyloo():
|
||||||
def get_potential_favicons(self, capture_uuid: str, /, all_favicons: Literal[True], for_datauri: Literal[False]) -> BytesIO:
|
def get_potential_favicons(self, capture_uuid: str, /, all_favicons: Literal[True], for_datauri: Literal[False]) -> BytesIO:
|
||||||
...
|
...
|
||||||
|
|
||||||
def get_potential_favicons(self, capture_uuid: str, /, all_favicons: bool=False, for_datauri: bool=False) -> Union[BytesIO, str]:
|
def get_potential_favicons(self, capture_uuid: str, /, all_favicons: bool=False, for_datauri: bool=False) -> BytesIO | str:
|
||||||
'''Get rendered HTML'''
|
'''Get rendered HTML'''
|
||||||
fav = self._get_raw(capture_uuid, 'potential_favicons.ico', all_favicons)
|
fav = self._get_raw(capture_uuid, 'potential_favicons.ico', all_favicons)
|
||||||
if not all_favicons and for_datauri:
|
if not all_favicons and for_datauri:
|
||||||
|
@ -867,7 +868,7 @@ class Lookyloo():
|
||||||
'''Get rendered HTML'''
|
'''Get rendered HTML'''
|
||||||
return self._get_raw(capture_uuid, 'html', all_html)
|
return self._get_raw(capture_uuid, 'html', all_html)
|
||||||
|
|
||||||
def get_data(self, capture_uuid: str, /) -> Tuple[str, BytesIO]:
|
def get_data(self, capture_uuid: str, /) -> tuple[str, BytesIO]:
|
||||||
'''Get the data'''
|
'''Get the data'''
|
||||||
return self._get_raw(capture_uuid, 'data.filename', False).getvalue().decode(), self._get_raw(capture_uuid, 'data', False)
|
return self._get_raw(capture_uuid, 'data.filename', False).getvalue().decode(), self._get_raw(capture_uuid, 'data', False)
|
||||||
|
|
||||||
|
@ -879,7 +880,7 @@ class Lookyloo():
|
||||||
'''Get the screenshot(s) of the rendered page'''
|
'''Get the screenshot(s) of the rendered page'''
|
||||||
return self._get_raw(capture_uuid, 'png', all_files=False)
|
return self._get_raw(capture_uuid, 'png', all_files=False)
|
||||||
|
|
||||||
def get_screenshot_thumbnail(self, capture_uuid: str, /, for_datauri: bool=False, width: int=64) -> Union[str, BytesIO]:
|
def get_screenshot_thumbnail(self, capture_uuid: str, /, for_datauri: bool=False, width: int=64) -> str | BytesIO:
|
||||||
'''Get the thumbnail of the rendered page. Always crop to a square.'''
|
'''Get the thumbnail of the rendered page. Always crop to a square.'''
|
||||||
to_return = BytesIO()
|
to_return = BytesIO()
|
||||||
size = width, width
|
size = width, width
|
||||||
|
@ -921,12 +922,12 @@ class Lookyloo():
|
||||||
'''Get all the files related to this capture.'''
|
'''Get all the files related to this capture.'''
|
||||||
return self._get_raw(capture_uuid)
|
return self._get_raw(capture_uuid)
|
||||||
|
|
||||||
def get_urls_rendered_page(self, capture_uuid: str, /) -> List[str]:
|
def get_urls_rendered_page(self, capture_uuid: str, /) -> list[str]:
|
||||||
ct = self.get_crawled_tree(capture_uuid)
|
ct = self.get_crawled_tree(capture_uuid)
|
||||||
return sorted(set(ct.root_hartree.rendered_node.urls_in_rendered_page)
|
return sorted(set(ct.root_hartree.rendered_node.urls_in_rendered_page)
|
||||||
- set(ct.root_hartree.all_url_requests.keys()))
|
- set(ct.root_hartree.all_url_requests.keys()))
|
||||||
|
|
||||||
def get_body_hash_investigator(self, body_hash: str, /) -> Tuple[List[Tuple[str, str]], List[Tuple[str, float]]]:
|
def get_body_hash_investigator(self, body_hash: str, /) -> tuple[list[tuple[str, str]], list[tuple[str, float]]]:
|
||||||
'''Returns all the captures related to a hash (sha512), used in the web interface.'''
|
'''Returns all the captures related to a hash (sha512), used in the web interface.'''
|
||||||
total_captures, details = self.indexing.get_body_hash_captures(body_hash, limit=-1)
|
total_captures, details = self.indexing.get_body_hash_captures(body_hash, limit=-1)
|
||||||
cached_captures = self.sorted_capture_cache([d[0] for d in details])
|
cached_captures = self.sorted_capture_cache([d[0] for d in details])
|
||||||
|
@ -934,7 +935,7 @@ class Lookyloo():
|
||||||
domains = self.indexing.get_body_hash_domains(body_hash)
|
domains = self.indexing.get_body_hash_domains(body_hash)
|
||||||
return captures, domains
|
return captures, domains
|
||||||
|
|
||||||
def get_body_hash_full(self, body_hash: str, /) -> Tuple[Dict[str, List[Dict[str, str]]], BytesIO]:
|
def get_body_hash_full(self, body_hash: str, /) -> tuple[dict[str, list[dict[str, str]]], BytesIO]:
|
||||||
'''Returns a lot of information about the hash (sha512) and the hits in the instance.
|
'''Returns a lot of information about the hash (sha512) and the hits in the instance.
|
||||||
Also contains the data (base64 encoded)'''
|
Also contains the data (base64 encoded)'''
|
||||||
details = self.indexing.get_body_hash_urls(body_hash)
|
details = self.indexing.get_body_hash_urls(body_hash)
|
||||||
|
@ -969,9 +970,9 @@ class Lookyloo():
|
||||||
# TODO: Couldn't find the file anywhere. Maybe return a warning in the file?
|
# TODO: Couldn't find the file anywhere. Maybe return a warning in the file?
|
||||||
return details, BytesIO()
|
return details, BytesIO()
|
||||||
|
|
||||||
def get_all_body_hashes(self, capture_uuid: str, /) -> Dict[str, Dict[str, Union[URLNode, int]]]:
|
def get_all_body_hashes(self, capture_uuid: str, /) -> dict[str, dict[str, URLNode | int]]:
|
||||||
ct = self.get_crawled_tree(capture_uuid)
|
ct = self.get_crawled_tree(capture_uuid)
|
||||||
to_return: Dict[str, Dict[str, Union[URLNode, int]]] = defaultdict()
|
to_return: dict[str, dict[str, URLNode | int]] = defaultdict()
|
||||||
for node in ct.root_hartree.url_tree.traverse():
|
for node in ct.root_hartree.url_tree.traverse():
|
||||||
if node.empty_response or node.body_hash in to_return:
|
if node.empty_response or node.body_hash in to_return:
|
||||||
# If we have the same hash more than once, skip
|
# If we have the same hash more than once, skip
|
||||||
|
@ -981,24 +982,24 @@ class Lookyloo():
|
||||||
to_return[node.body_hash] = {'node': node, 'total_captures': total_captures}
|
to_return[node.body_hash] = {'node': node, 'total_captures': total_captures}
|
||||||
return to_return
|
return to_return
|
||||||
|
|
||||||
def get_latest_url_capture(self, url: str, /) -> Optional[CaptureCache]:
|
def get_latest_url_capture(self, url: str, /) -> CaptureCache | None:
|
||||||
'''Get the most recent capture with this URL'''
|
'''Get the most recent capture with this URL'''
|
||||||
captures = self.sorted_capture_cache(self.indexing.get_captures_url(url))
|
captures = self.sorted_capture_cache(self.indexing.get_captures_url(url))
|
||||||
if captures:
|
if captures:
|
||||||
return captures[0]
|
return captures[0]
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def get_url_occurrences(self, url: str, /, limit: int=20, cached_captures_only: bool=True) -> List[Dict]:
|
def get_url_occurrences(self, url: str, /, limit: int=20, cached_captures_only: bool=True) -> list[dict[str, Any]]:
|
||||||
'''Get the most recent captures and URL nodes where the URL has been seen.'''
|
'''Get the most recent captures and URL nodes where the URL has been seen.'''
|
||||||
captures = self.sorted_capture_cache(self.indexing.get_captures_url(url), cached_captures_only=cached_captures_only)
|
captures = self.sorted_capture_cache(self.indexing.get_captures_url(url), cached_captures_only=cached_captures_only)
|
||||||
|
|
||||||
to_return: List[Dict] = []
|
to_return: list[dict[str, Any]] = []
|
||||||
for capture in captures[:limit]:
|
for capture in captures[:limit]:
|
||||||
ct = self.get_crawled_tree(capture.uuid)
|
ct = self.get_crawled_tree(capture.uuid)
|
||||||
to_append: Dict[str, Union[str, Dict]] = {'capture_uuid': capture.uuid,
|
to_append: dict[str, str | dict[str, Any]] = {'capture_uuid': capture.uuid,
|
||||||
'start_timestamp': capture.timestamp.isoformat(),
|
'start_timestamp': capture.timestamp.isoformat(),
|
||||||
'title': capture.title}
|
'title': capture.title}
|
||||||
urlnodes: Dict[str, Dict[str, str]] = {}
|
urlnodes: dict[str, dict[str, str]] = {}
|
||||||
for urlnode in ct.root_hartree.url_tree.search_nodes(name=url):
|
for urlnode in ct.root_hartree.url_tree.search_nodes(name=url):
|
||||||
urlnodes[urlnode.uuid] = {'start_time': urlnode.start_time.isoformat(),
|
urlnodes[urlnode.uuid] = {'start_time': urlnode.start_time.isoformat(),
|
||||||
'hostnode_uuid': urlnode.hostnode_uuid}
|
'hostnode_uuid': urlnode.hostnode_uuid}
|
||||||
|
@ -1008,19 +1009,20 @@ class Lookyloo():
|
||||||
to_return.append(to_append)
|
to_return.append(to_append)
|
||||||
return to_return
|
return to_return
|
||||||
|
|
||||||
def get_hostname_occurrences(self, hostname: str, /, with_urls_occurrences: bool=False, limit: int=20, cached_captures_only: bool=True) -> List[Dict]:
|
def get_hostname_occurrences(self, hostname: str, /, with_urls_occurrences: bool=False, limit: int=20, cached_captures_only: bool=True) -> list[dict[str, Any]]:
|
||||||
'''Get the most recent captures and URL nodes where the hostname has been seen.'''
|
'''Get the most recent captures and URL nodes where the hostname has been seen.'''
|
||||||
captures = self.sorted_capture_cache(self.indexing.get_captures_hostname(hostname), cached_captures_only=cached_captures_only)
|
captures = self.sorted_capture_cache(self.indexing.get_captures_hostname(hostname), cached_captures_only=cached_captures_only)
|
||||||
|
|
||||||
to_return: List[Dict] = []
|
to_return: list[dict[str, Any]] = []
|
||||||
for capture in captures[:limit]:
|
for capture in captures[:limit]:
|
||||||
ct = self.get_crawled_tree(capture.uuid)
|
ct = self.get_crawled_tree(capture.uuid)
|
||||||
to_append: Dict[str, Union[str, List, Dict]] = {'capture_uuid': capture.uuid,
|
to_append: dict[str, str | list[Any] | dict[str, Any]] = {
|
||||||
'start_timestamp': capture.timestamp.isoformat(),
|
'capture_uuid': capture.uuid,
|
||||||
'title': capture.title}
|
'start_timestamp': capture.timestamp.isoformat(),
|
||||||
hostnodes: List[str] = []
|
'title': capture.title}
|
||||||
|
hostnodes: list[str] = []
|
||||||
if with_urls_occurrences:
|
if with_urls_occurrences:
|
||||||
urlnodes: Dict[str, Dict[str, str]] = {}
|
urlnodes: dict[str, dict[str, str]] = {}
|
||||||
for hostnode in ct.root_hartree.hostname_tree.search_nodes(name=hostname):
|
for hostnode in ct.root_hartree.hostname_tree.search_nodes(name=hostname):
|
||||||
hostnodes.append(hostnode.uuid)
|
hostnodes.append(hostnode.uuid)
|
||||||
if with_urls_occurrences:
|
if with_urls_occurrences:
|
||||||
|
@ -1036,7 +1038,7 @@ class Lookyloo():
|
||||||
to_return.append(to_append)
|
to_return.append(to_append)
|
||||||
return to_return
|
return to_return
|
||||||
|
|
||||||
def get_cookie_name_investigator(self, cookie_name: str, /) -> Tuple[List[Tuple[str, str]], List[Tuple[str, float, List[Tuple[str, float]]]]]:
|
def get_cookie_name_investigator(self, cookie_name: str, /) -> tuple[list[tuple[str, str]], list[tuple[str, float, list[tuple[str, float]]]]]:
|
||||||
'''Returns all the captures related to a cookie name entry, used in the web interface.'''
|
'''Returns all the captures related to a cookie name entry, used in the web interface.'''
|
||||||
cached_captures = self.sorted_capture_cache([entry[0] for entry in self.indexing.get_cookies_names_captures(cookie_name)])
|
cached_captures = self.sorted_capture_cache([entry[0] for entry in self.indexing.get_cookies_names_captures(cookie_name)])
|
||||||
captures = [(cache.uuid, cache.title) for cache in cached_captures]
|
captures = [(cache.uuid, cache.title) for cache in cached_captures]
|
||||||
|
@ -1044,7 +1046,7 @@ class Lookyloo():
|
||||||
for domain, freq in self.indexing.get_cookie_domains(cookie_name)]
|
for domain, freq in self.indexing.get_cookie_domains(cookie_name)]
|
||||||
return captures, domains
|
return captures, domains
|
||||||
|
|
||||||
def get_hhh_investigator(self, hhh: str, /) -> Tuple[List[Tuple[str, str, str, str]], List[Tuple[str, str]]]:
|
def get_hhh_investigator(self, hhh: str, /) -> tuple[list[tuple[str, str, str, str]], list[tuple[str, str]]]:
|
||||||
'''Returns all the captures related to a cookie name entry, used in the web interface.'''
|
'''Returns all the captures related to a cookie name entry, used in the web interface.'''
|
||||||
all_captures = dict(self.indexing.get_http_headers_hashes_captures(hhh))
|
all_captures = dict(self.indexing.get_http_headers_hashes_captures(hhh))
|
||||||
if cached_captures := self.sorted_capture_cache([entry for entry in all_captures]):
|
if cached_captures := self.sorted_capture_cache([entry for entry in all_captures]):
|
||||||
|
@ -1063,11 +1065,11 @@ class Lookyloo():
|
||||||
return captures, headers
|
return captures, headers
|
||||||
return [], []
|
return [], []
|
||||||
|
|
||||||
def hash_lookup(self, blob_hash: str, url: str, capture_uuid: str) -> Tuple[int, Dict[str, List[Tuple[str, str, str, str, str]]]]:
|
def hash_lookup(self, blob_hash: str, url: str, capture_uuid: str) -> tuple[int, dict[str, list[tuple[str, str, str, str, str]]]]:
|
||||||
'''Search all the captures a specific hash was seen.
|
'''Search all the captures a specific hash was seen.
|
||||||
If a URL is given, it splits the results if the hash is seen on the same URL or an other one.
|
If a URL is given, it splits the results if the hash is seen on the same URL or an other one.
|
||||||
Capture UUID avoids duplicates on the same capture'''
|
Capture UUID avoids duplicates on the same capture'''
|
||||||
captures_list: Dict[str, List[Tuple[str, str, str, str, str]]] = {'same_url': [], 'different_url': []}
|
captures_list: dict[str, list[tuple[str, str, str, str, str]]] = {'same_url': [], 'different_url': []}
|
||||||
total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid, limit=-1,
|
total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid, limit=-1,
|
||||||
prefered_uuids=set(self._captures_index.keys()))
|
prefered_uuids=set(self._captures_index.keys()))
|
||||||
for h_capture_uuid, url_uuid, url_hostname, same_url in details:
|
for h_capture_uuid, url_uuid, url_hostname, same_url in details:
|
||||||
|
@ -1082,7 +1084,7 @@ class Lookyloo():
|
||||||
captures_list['different_url'].sort(key=lambda y: y[3])
|
captures_list['different_url'].sort(key=lambda y: y[3])
|
||||||
return total_captures, captures_list
|
return total_captures, captures_list
|
||||||
|
|
||||||
def get_ressource(self, tree_uuid: str, /, urlnode_uuid: str, h: Optional[str]) -> Optional[Tuple[str, BytesIO, str]]:
|
def get_ressource(self, tree_uuid: str, /, urlnode_uuid: str, h: str | None) -> tuple[str, BytesIO, str] | None:
|
||||||
'''Get a specific resource from a URL node. If a hash s also given, we want an embeded resource'''
|
'''Get a specific resource from a URL node. If a hash s also given, we want an embeded resource'''
|
||||||
try:
|
try:
|
||||||
url = self.get_urlnode_from_tree(tree_uuid, urlnode_uuid)
|
url = self.get_urlnode_from_tree(tree_uuid, urlnode_uuid)
|
||||||
|
@ -1108,7 +1110,7 @@ class Lookyloo():
|
||||||
return 'embedded_ressource.bin', BytesIO(blob.getvalue()), mimetype
|
return 'embedded_ressource.bin', BytesIO(blob.getvalue()), mimetype
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def __misp_add_vt_to_URLObject(self, obj: MISPObject) -> Optional[MISPObject]:
|
def __misp_add_vt_to_URLObject(self, obj: MISPObject) -> MISPObject | None:
|
||||||
urls = obj.get_attributes_by_relation('url')
|
urls = obj.get_attributes_by_relation('url')
|
||||||
if not urls:
|
if not urls:
|
||||||
return None
|
return None
|
||||||
|
@ -1124,7 +1126,7 @@ class Lookyloo():
|
||||||
obj.add_reference(vt_obj, 'analysed-with')
|
obj.add_reference(vt_obj, 'analysed-with')
|
||||||
return vt_obj
|
return vt_obj
|
||||||
|
|
||||||
def __misp_add_urlscan_to_event(self, capture_uuid: str, visibility: str) -> Optional[MISPAttribute]:
|
def __misp_add_urlscan_to_event(self, capture_uuid: str, visibility: str) -> MISPAttribute | None:
|
||||||
if cache := self.capture_cache(capture_uuid):
|
if cache := self.capture_cache(capture_uuid):
|
||||||
response = self.urlscan.url_submit(cache, visibility)
|
response = self.urlscan.url_submit(cache, visibility)
|
||||||
if 'result' in response:
|
if 'result' in response:
|
||||||
|
@ -1134,7 +1136,7 @@ class Lookyloo():
|
||||||
return attribute
|
return attribute
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def misp_export(self, capture_uuid: str, /, with_parent: bool=False) -> Union[List[MISPEvent], Dict[str, str]]:
|
def misp_export(self, capture_uuid: str, /, with_parent: bool=False) -> list[MISPEvent] | dict[str, str]:
|
||||||
'''Export a capture in MISP format. You can POST the return of this method
|
'''Export a capture in MISP format. You can POST the return of this method
|
||||||
directly to a MISP instance and it will create an event.'''
|
directly to a MISP instance and it will create an event.'''
|
||||||
cache = self.capture_cache(capture_uuid)
|
cache = self.capture_cache(capture_uuid)
|
||||||
|
@ -1200,7 +1202,7 @@ class Lookyloo():
|
||||||
|
|
||||||
return [event]
|
return [event]
|
||||||
|
|
||||||
def get_misp_occurrences(self, capture_uuid: str, /, *, instance_name: Optional[str]=None) -> Optional[Tuple[Dict[str, Set[str]], str]]:
|
def get_misp_occurrences(self, capture_uuid: str, /, *, instance_name: str | None=None) -> tuple[dict[str, set[str]], str] | None:
|
||||||
if instance_name is None:
|
if instance_name is None:
|
||||||
misp = self.misps.default_misp
|
misp = self.misps.default_misp
|
||||||
elif self.misps.get(instance_name) is not None:
|
elif self.misps.get(instance_name) is not None:
|
||||||
|
@ -1217,7 +1219,7 @@ class Lookyloo():
|
||||||
self.logger.warning(f'Unable to get the modules responses unless the tree ({capture_uuid}) is cached.')
|
self.logger.warning(f'Unable to get the modules responses unless the tree ({capture_uuid}) is cached.')
|
||||||
return None
|
return None
|
||||||
nodes_to_lookup = ct.root_hartree.rendered_node.get_ancestors() + [ct.root_hartree.rendered_node]
|
nodes_to_lookup = ct.root_hartree.rendered_node.get_ancestors() + [ct.root_hartree.rendered_node]
|
||||||
to_return: Dict[str, Set[str]] = defaultdict(set)
|
to_return: dict[str, set[str]] = defaultdict(set)
|
||||||
for node in nodes_to_lookup:
|
for node in nodes_to_lookup:
|
||||||
hits = misp.lookup(node, ct.root_hartree.get_host_node_by_uuid(node.hostnode_uuid))
|
hits = misp.lookup(node, ct.root_hartree.get_host_node_by_uuid(node.hostnode_uuid))
|
||||||
for event_id, values in hits.items():
|
for event_id, values in hits.items():
|
||||||
|
@ -1226,7 +1228,7 @@ class Lookyloo():
|
||||||
to_return[event_id].update(values)
|
to_return[event_id].update(values)
|
||||||
return to_return, misp.client.root_url
|
return to_return, misp.client.root_url
|
||||||
|
|
||||||
def get_hashes_with_context(self, tree_uuid: str, /, algorithm: str, *, urls_only: bool=False) -> Union[Dict[str, Set[str]], Dict[str, List[URLNode]]]:
|
def get_hashes_with_context(self, tree_uuid: str, /, algorithm: str, *, urls_only: bool=False) -> dict[str, set[str]] | dict[str, list[URLNode]]:
|
||||||
"""Build (on demand) hashes for all the ressources of the tree, using the alorighm provided by the user.
|
"""Build (on demand) hashes for all the ressources of the tree, using the alorighm provided by the user.
|
||||||
If you just want the hashes in SHA512, use the get_hashes method, it gives you a list of hashes an they're build
|
If you just want the hashes in SHA512, use the get_hashes method, it gives you a list of hashes an they're build
|
||||||
with the tree. This method is computing the hashes when you query it, so it is slower."""
|
with the tree. This method is computing the hashes when you query it, so it is slower."""
|
||||||
|
@ -1236,7 +1238,7 @@ class Lookyloo():
|
||||||
return {h: {node.name for node in nodes} for h, nodes in hashes.items()}
|
return {h: {node.name for node in nodes} for h, nodes in hashes.items()}
|
||||||
return hashes
|
return hashes
|
||||||
|
|
||||||
def merge_hashlookup_tree(self, tree_uuid: str, /) -> Tuple[Dict[str, Dict[str, Any]], int]:
|
def merge_hashlookup_tree(self, tree_uuid: str, /) -> tuple[dict[str, dict[str, Any]], int]:
|
||||||
if not self.hashlookup.available:
|
if not self.hashlookup.available:
|
||||||
raise LookylooException('Hashlookup module not enabled.')
|
raise LookylooException('Hashlookup module not enabled.')
|
||||||
hashes_tree = self.get_hashes_with_context(tree_uuid, algorithm='sha1')
|
hashes_tree = self.get_hashes_with_context(tree_uuid, algorithm='sha1')
|
||||||
|
@ -1253,20 +1255,20 @@ class Lookyloo():
|
||||||
with hashlookup_file.open() as f:
|
with hashlookup_file.open() as f:
|
||||||
hashlookup_entries = json.load(f)
|
hashlookup_entries = json.load(f)
|
||||||
|
|
||||||
to_return: Dict[str, Dict[str, Any]] = defaultdict(dict)
|
to_return: dict[str, dict[str, Any]] = defaultdict(dict)
|
||||||
|
|
||||||
for sha1 in hashlookup_entries.keys():
|
for sha1 in hashlookup_entries.keys():
|
||||||
to_return[sha1]['nodes'] = hashes_tree[sha1]
|
to_return[sha1]['nodes'] = hashes_tree[sha1]
|
||||||
to_return[sha1]['hashlookup'] = hashlookup_entries[sha1]
|
to_return[sha1]['hashlookup'] = hashlookup_entries[sha1]
|
||||||
return to_return, len(hashes_tree)
|
return to_return, len(hashes_tree)
|
||||||
|
|
||||||
def get_hashes(self, tree_uuid: str, /, hostnode_uuid: Optional[str]=None, urlnode_uuid: Optional[str]=None) -> Set[str]:
|
def get_hashes(self, tree_uuid: str, /, hostnode_uuid: str | None=None, urlnode_uuid: str | None=None) -> set[str]:
|
||||||
"""Return hashes (sha512) of resources.
|
"""Return hashes (sha512) of resources.
|
||||||
Only tree_uuid: All the hashes
|
Only tree_uuid: All the hashes
|
||||||
tree_uuid and hostnode_uuid: hashes of all the resources in that hostnode (including embedded ressources)
|
tree_uuid and hostnode_uuid: hashes of all the resources in that hostnode (including embedded ressources)
|
||||||
tree_uuid, hostnode_uuid, and urlnode_uuid: hash of the URL node body, and embedded resources
|
tree_uuid, hostnode_uuid, and urlnode_uuid: hash of the URL node body, and embedded resources
|
||||||
"""
|
"""
|
||||||
container: Union[CrawledTree, HostNode, URLNode]
|
container: CrawledTree | HostNode | URLNode
|
||||||
if urlnode_uuid:
|
if urlnode_uuid:
|
||||||
container = self.get_urlnode_from_tree(tree_uuid, urlnode_uuid)
|
container = self.get_urlnode_from_tree(tree_uuid, urlnode_uuid)
|
||||||
elif hostnode_uuid:
|
elif hostnode_uuid:
|
||||||
|
@ -1275,7 +1277,7 @@ class Lookyloo():
|
||||||
container = self.get_crawled_tree(tree_uuid)
|
container = self.get_crawled_tree(tree_uuid)
|
||||||
return get_resources_hashes(container)
|
return get_resources_hashes(container)
|
||||||
|
|
||||||
def get_hostnames(self, tree_uuid: str, /, hostnode_uuid: Optional[str]=None, urlnode_uuid: Optional[str]=None) -> Set[str]:
|
def get_hostnames(self, tree_uuid: str, /, hostnode_uuid: str | None=None, urlnode_uuid: str | None=None) -> set[str]:
|
||||||
"""Return all the unique hostnames:
|
"""Return all the unique hostnames:
|
||||||
* of a complete tree if no hostnode_uuid and urlnode_uuid are given
|
* of a complete tree if no hostnode_uuid and urlnode_uuid are given
|
||||||
* of a HostNode if hostnode_uuid is given
|
* of a HostNode if hostnode_uuid is given
|
||||||
|
@ -1291,7 +1293,7 @@ class Lookyloo():
|
||||||
ct = self.get_crawled_tree(tree_uuid)
|
ct = self.get_crawled_tree(tree_uuid)
|
||||||
return {node.name for node in ct.root_hartree.hostname_tree.traverse()}
|
return {node.name for node in ct.root_hartree.hostname_tree.traverse()}
|
||||||
|
|
||||||
def get_urls(self, tree_uuid: str, /, hostnode_uuid: Optional[str]=None, urlnode_uuid: Optional[str]=None) -> Set[str]:
|
def get_urls(self, tree_uuid: str, /, hostnode_uuid: str | None=None, urlnode_uuid: str | None=None) -> set[str]:
|
||||||
"""Return all the unique URLs:
|
"""Return all the unique URLs:
|
||||||
* of a complete tree if no hostnode_uuid and urlnode_uuid are given
|
* of a complete tree if no hostnode_uuid and urlnode_uuid are given
|
||||||
* of a HostNode if hostnode_uuid is given
|
* of a HostNode if hostnode_uuid is given
|
||||||
|
@ -1307,18 +1309,18 @@ class Lookyloo():
|
||||||
ct = self.get_crawled_tree(tree_uuid)
|
ct = self.get_crawled_tree(tree_uuid)
|
||||||
return {node.name for node in ct.root_hartree.url_tree.traverse()}
|
return {node.name for node in ct.root_hartree.url_tree.traverse()}
|
||||||
|
|
||||||
def get_playwright_devices(self) -> Dict:
|
def get_playwright_devices(self) -> dict[str, Any]:
|
||||||
"""Get the preconfigured devices from Playwright"""
|
"""Get the preconfigured devices from Playwright"""
|
||||||
return get_devices()
|
return get_devices()
|
||||||
|
|
||||||
def get_hostnode_investigator(self, capture_uuid: str, /, node_uuid: str) -> Tuple[HostNode, List[Dict[str, Any]]]:
|
def get_hostnode_investigator(self, capture_uuid: str, /, node_uuid: str) -> tuple[HostNode, list[dict[str, Any]]]:
|
||||||
'''Gather all the informations needed to display the Hostnode investigator popup.'''
|
'''Gather all the informations needed to display the Hostnode investigator popup.'''
|
||||||
|
|
||||||
def normalize_known_content(h: str, /, known_content: Dict[str, Any], url: URLNode) -> Tuple[Optional[Union[str, List[Any]]], Optional[Tuple[bool, Any]]]:
|
def normalize_known_content(h: str, /, known_content: dict[str, Any], url: URLNode) -> tuple[str | list[Any] | None, tuple[bool, Any] | None]:
|
||||||
''' There are a few different sources to figure out known vs. legitimate content,
|
''' There are a few different sources to figure out known vs. legitimate content,
|
||||||
this method normalize it for the web interface.'''
|
this method normalize it for the web interface.'''
|
||||||
known: Optional[Union[str, List[Any]]] = None
|
known: str | list[Any] | None = None
|
||||||
legitimate: Optional[Tuple[bool, Any]] = None
|
legitimate: tuple[bool, Any] | None = None
|
||||||
if h not in known_content:
|
if h not in known_content:
|
||||||
return known, legitimate
|
return known, legitimate
|
||||||
|
|
||||||
|
@ -1340,13 +1342,13 @@ class Lookyloo():
|
||||||
known_content = self.context.find_known_content(hostnode)
|
known_content = self.context.find_known_content(hostnode)
|
||||||
self.uwhois.query_whois_hostnode(hostnode)
|
self.uwhois.query_whois_hostnode(hostnode)
|
||||||
|
|
||||||
urls: List[Dict[str, Any]] = []
|
urls: list[dict[str, Any]] = []
|
||||||
for url in hostnode.urls:
|
for url in hostnode.urls:
|
||||||
# For the popup, we need:
|
# For the popup, we need:
|
||||||
# * https vs http
|
# * https vs http
|
||||||
# * everything after the domain
|
# * everything after the domain
|
||||||
# * the full URL
|
# * the full URL
|
||||||
to_append: Dict[str, Any] = {
|
to_append: dict[str, Any] = {
|
||||||
'encrypted': url.name.startswith('https'),
|
'encrypted': url.name.startswith('https'),
|
||||||
'url_path': url.name.split('/', 3)[-1],
|
'url_path': url.name.split('/', 3)[-1],
|
||||||
'url_object': url,
|
'url_object': url,
|
||||||
|
@ -1389,7 +1391,7 @@ class Lookyloo():
|
||||||
|
|
||||||
# Optional: Cookies sent to server in request -> map to nodes who set the cookie in response
|
# Optional: Cookies sent to server in request -> map to nodes who set the cookie in response
|
||||||
if hasattr(url, 'cookies_sent'):
|
if hasattr(url, 'cookies_sent'):
|
||||||
to_display_sent: Dict[str, Set[Iterable[Optional[str]]]] = defaultdict(set)
|
to_display_sent: dict[str, set[Iterable[str | None]]] = defaultdict(set)
|
||||||
for cookie, contexts in url.cookies_sent.items():
|
for cookie, contexts in url.cookies_sent.items():
|
||||||
if not contexts:
|
if not contexts:
|
||||||
# Locally created?
|
# Locally created?
|
||||||
|
@ -1401,7 +1403,7 @@ class Lookyloo():
|
||||||
|
|
||||||
# Optional: Cookies received from server in response -> map to nodes who send the cookie in request
|
# Optional: Cookies received from server in response -> map to nodes who send the cookie in request
|
||||||
if hasattr(url, 'cookies_received'):
|
if hasattr(url, 'cookies_received'):
|
||||||
to_display_received: Dict[str, Dict[str, Set[Iterable[Optional[str]]]]] = {'3rd_party': defaultdict(set), 'sent': defaultdict(set), 'not_sent': defaultdict(set)}
|
to_display_received: dict[str, dict[str, set[Iterable[str | None]]]] = {'3rd_party': defaultdict(set), 'sent': defaultdict(set), 'not_sent': defaultdict(set)}
|
||||||
for domain, c_received, is_3rd_party in url.cookies_received:
|
for domain, c_received, is_3rd_party in url.cookies_received:
|
||||||
if c_received not in ct.root_hartree.cookies_sent:
|
if c_received not in ct.root_hartree.cookies_sent:
|
||||||
# This cookie is never sent.
|
# This cookie is never sent.
|
||||||
|
@ -1421,14 +1423,14 @@ class Lookyloo():
|
||||||
urls.append(to_append)
|
urls.append(to_append)
|
||||||
return hostnode, urls
|
return hostnode, urls
|
||||||
|
|
||||||
def get_stats(self) -> Dict[str, List]:
|
def get_stats(self) -> dict[str, list[Any]]:
|
||||||
'''Gather statistics about the lookyloo instance'''
|
'''Gather statistics about the lookyloo instance'''
|
||||||
today = date.today()
|
today = date.today()
|
||||||
calendar_week = today.isocalendar()[1]
|
calendar_week = today.isocalendar()[1]
|
||||||
|
|
||||||
stats_dict = {'submissions': 0, 'redirects': 0}
|
stats_dict = {'submissions': 0, 'redirects': 0}
|
||||||
stats: Dict[int, Dict[int, Dict[str, Any]]] = {}
|
stats: dict[int, dict[int, dict[str, Any]]] = {}
|
||||||
weeks_stats: Dict[int, Dict] = {}
|
weeks_stats: dict[int, dict[str, Any]] = {}
|
||||||
|
|
||||||
# Only recent captures that are not archived
|
# Only recent captures that are not archived
|
||||||
for cache in self.sorted_capture_cache():
|
for cache in self.sorted_capture_cache():
|
||||||
|
@ -1467,7 +1469,7 @@ class Lookyloo():
|
||||||
stats[capture_ts.year][capture_ts.month] = {'submissions': 0}
|
stats[capture_ts.year][capture_ts.month] = {'submissions': 0}
|
||||||
stats[capture_ts.year][capture_ts.month]['submissions'] += 1
|
stats[capture_ts.year][capture_ts.month]['submissions'] += 1
|
||||||
|
|
||||||
statistics: Dict[str, List] = {'weeks': [], 'years': []}
|
statistics: dict[str, list[Any]] = {'weeks': [], 'years': []}
|
||||||
for week_number in sorted(weeks_stats.keys()):
|
for week_number in sorted(weeks_stats.keys()):
|
||||||
week_stat = weeks_stats[week_number]
|
week_stat = weeks_stats[week_number]
|
||||||
urls = week_stat.pop('uniq_urls')
|
urls = week_stat.pop('uniq_urls')
|
||||||
|
@ -1477,7 +1479,7 @@ class Lookyloo():
|
||||||
statistics['weeks'].append(week_stat)
|
statistics['weeks'].append(week_stat)
|
||||||
|
|
||||||
for year in sorted(stats.keys()):
|
for year in sorted(stats.keys()):
|
||||||
year_stats: Dict[str, Union[int, List]] = {'year': year, 'months': [], 'yearly_submissions': 0}
|
year_stats: dict[str, int | list[Any]] = {'year': year, 'months': [], 'yearly_submissions': 0}
|
||||||
for month in sorted(stats[year].keys()):
|
for month in sorted(stats[year].keys()):
|
||||||
month_stats = stats[year][month]
|
month_stats = stats[year][month]
|
||||||
if len(month_stats) == 1:
|
if len(month_stats) == 1:
|
||||||
|
@ -1496,15 +1498,15 @@ class Lookyloo():
|
||||||
return statistics
|
return statistics
|
||||||
|
|
||||||
def store_capture(self, uuid: str, is_public: bool,
|
def store_capture(self, uuid: str, is_public: bool,
|
||||||
os: Optional[str]=None, browser: Optional[str]=None,
|
os: str | None=None, browser: str | None=None,
|
||||||
parent: Optional[str]=None,
|
parent: str | None=None,
|
||||||
downloaded_filename: Optional[str]=None, downloaded_file: Optional[bytes]=None,
|
downloaded_filename: str | None=None, downloaded_file: bytes | None=None,
|
||||||
error: Optional[str]=None, har: Optional[Dict[str, Any]]=None,
|
error: str | None=None, har: dict[str, Any] | None=None,
|
||||||
png: Optional[bytes]=None, html: Optional[str]=None,
|
png: bytes | None=None, html: str | None=None,
|
||||||
last_redirected_url: Optional[str]=None,
|
last_redirected_url: str | None=None,
|
||||||
cookies: Optional[Union[List['Cookie'], List[Dict[str, str]]]]=None,
|
cookies: list[Cookie] | list[dict[str, str]] | None=None,
|
||||||
capture_settings: Optional[CaptureSettings]=None,
|
capture_settings: CaptureSettings | None=None,
|
||||||
potential_favicons: Optional[Set[bytes]]=None
|
potential_favicons: set[bytes] | None=None
|
||||||
) -> None:
|
) -> None:
|
||||||
|
|
||||||
now = datetime.now()
|
now = datetime.now()
|
||||||
|
@ -1512,7 +1514,7 @@ class Lookyloo():
|
||||||
safe_create_dir(dirpath)
|
safe_create_dir(dirpath)
|
||||||
|
|
||||||
if os or browser:
|
if os or browser:
|
||||||
meta: Dict[str, str] = {}
|
meta: dict[str, str] = {}
|
||||||
if os:
|
if os:
|
||||||
meta['os'] = os
|
meta['os'] = os
|
||||||
if browser:
|
if browser:
|
||||||
|
|
|
@ -14,3 +14,22 @@ from .riskiq import RiskIQ, RiskIQError # noqa
|
||||||
from .urlhaus import URLhaus # noqa
|
from .urlhaus import URLhaus # noqa
|
||||||
from .cloudflare import Cloudflare # noqa
|
from .cloudflare import Cloudflare # noqa
|
||||||
from .circlpdns import CIRCLPDNS # noqa
|
from .circlpdns import CIRCLPDNS # noqa
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
'FOX',
|
||||||
|
'MISPs',
|
||||||
|
'MISP',
|
||||||
|
'PhishingInitiative',
|
||||||
|
'SaneJavaScript',
|
||||||
|
'UrlScan',
|
||||||
|
'UniversalWhois',
|
||||||
|
'VirusTotal',
|
||||||
|
'Pandora',
|
||||||
|
'Phishtank',
|
||||||
|
'Hashlookup',
|
||||||
|
'RiskIQ',
|
||||||
|
'RiskIQError',
|
||||||
|
'URLhaus',
|
||||||
|
'Cloudflare',
|
||||||
|
'CIRCLPDNS'
|
||||||
|
]
|
||||||
|
|
|
@ -1,12 +1,14 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from datetime import date
|
from datetime import date
|
||||||
from typing import Dict, List, Optional, TYPE_CHECKING
|
from typing import Dict, List, Optional, TYPE_CHECKING
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from pypdns import PyPDNS, PDNSRecord
|
from pypdns import PyPDNS, PDNSRecord # type: ignore[attr-defined]
|
||||||
|
|
||||||
from ..default import ConfigError, get_homedir
|
from ..default import ConfigError, get_homedir
|
||||||
from ..helpers import get_cache_directory
|
from ..helpers import get_cache_directory
|
||||||
|
@ -32,7 +34,7 @@ class CIRCLPDNS(AbstractModule):
|
||||||
self.storage_dir_pypdns.mkdir(parents=True, exist_ok=True)
|
self.storage_dir_pypdns.mkdir(parents=True, exist_ok=True)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def get_passivedns(self, query: str) -> Optional[List[PDNSRecord]]:
|
def get_passivedns(self, query: str) -> list[PDNSRecord] | None:
|
||||||
# The query can be IP or Hostname. For now, we only do it on domains.
|
# The query can be IP or Hostname. For now, we only do it on domains.
|
||||||
url_storage_dir = get_cache_directory(self.storage_dir_pypdns, query, 'pdns')
|
url_storage_dir = get_cache_directory(self.storage_dir_pypdns, query, 'pdns')
|
||||||
if not url_storage_dir.exists():
|
if not url_storage_dir.exists():
|
||||||
|
@ -44,7 +46,7 @@ class CIRCLPDNS(AbstractModule):
|
||||||
with cached_entries[0].open() as f:
|
with cached_entries[0].open() as f:
|
||||||
return [PDNSRecord(record) for record in json.load(f)]
|
return [PDNSRecord(record) for record in json.load(f)]
|
||||||
|
|
||||||
def capture_default_trigger(self, cache: 'CaptureCache', /, *, force: bool=False, auto_trigger: bool=False) -> Dict:
|
def capture_default_trigger(self, cache: CaptureCache, /, *, force: bool=False, auto_trigger: bool=False) -> dict[str, str]:
|
||||||
'''Run the module on all the nodes up to the final redirect'''
|
'''Run the module on all the nodes up to the final redirect'''
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return {'error': 'Module not available'}
|
return {'error': 'Module not available'}
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
from typing import Dict
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Dict, Any
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
@ -29,7 +31,7 @@ class FOX(AbstractModule):
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def capture_default_trigger(self, url: str, /, auto_trigger: bool=False) -> Dict:
|
def capture_default_trigger(self, url: str, /, auto_trigger: bool=False) -> dict[str, str]:
|
||||||
'''Run the module on the initial URL'''
|
'''Run the module on the initial URL'''
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return {'error': 'Module not available'}
|
return {'error': 'Module not available'}
|
||||||
|
@ -52,7 +54,7 @@ class FOX(AbstractModule):
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def url_submit(self, url: str) -> Dict:
|
def url_submit(self, url: str) -> dict[str, Any]:
|
||||||
'''Submit a URL to FOX
|
'''Submit a URL to FOX
|
||||||
'''
|
'''
|
||||||
if not self.available:
|
if not self.available:
|
||||||
|
|
|
@ -1,10 +1,12 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from typing import Dict, List
|
from typing import Dict, List
|
||||||
|
|
||||||
from har2tree import CrawledTree
|
from har2tree import CrawledTree # type: ignore[attr-defined]
|
||||||
from pyhashlookup import Hashlookup
|
from pyhashlookup import Hashlookup # type: ignore[attr-defined]
|
||||||
|
|
||||||
from ..default import ConfigError
|
from ..default import ConfigError
|
||||||
from ..helpers import get_useragent_for_requests
|
from ..helpers import get_useragent_for_requests
|
||||||
|
@ -31,7 +33,7 @@ class HashlookupModule(AbstractModule):
|
||||||
self.allow_auto_trigger = bool(self.config.get('allow_auto_trigger', False))
|
self.allow_auto_trigger = bool(self.config.get('allow_auto_trigger', False))
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def capture_default_trigger(self, crawled_tree: CrawledTree, /, *, auto_trigger: bool=False) -> Dict:
|
def capture_default_trigger(self, crawled_tree: CrawledTree, /, *, auto_trigger: bool=False) -> dict[str, str]:
|
||||||
'''Run the module on all the nodes up to the final redirect'''
|
'''Run the module on all the nodes up to the final redirect'''
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return {'error': 'Module not available'}
|
return {'error': 'Module not available'}
|
||||||
|
@ -52,14 +54,14 @@ class HashlookupModule(AbstractModule):
|
||||||
|
|
||||||
return {'success': 'Module triggered'}
|
return {'success': 'Module triggered'}
|
||||||
|
|
||||||
def hashes_lookup(self, hashes: List[str]) -> Dict[str, Dict[str, str]]:
|
def hashes_lookup(self, hashes: list[str]) -> dict[str, dict[str, str]]:
|
||||||
'''Lookup a list of hashes against Hashlookup
|
'''Lookup a list of hashes against Hashlookup
|
||||||
Note: It will trigger a request to hashlookup every time *until* there is a hit, then once a day.
|
Note: It will trigger a request to hashlookup every time *until* there is a hit, then once a day.
|
||||||
'''
|
'''
|
||||||
if not self.available:
|
if not self.available:
|
||||||
raise ConfigError('Hashlookup not available, probably not enabled.')
|
raise ConfigError('Hashlookup not available, probably not enabled.')
|
||||||
|
|
||||||
to_return: Dict[str, Dict[str, str]] = {}
|
to_return: dict[str, dict[str, str]] = {}
|
||||||
for entry in self.client.sha1_bulk_lookup(hashes):
|
for entry in self.client.sha1_bulk_lookup(hashes):
|
||||||
if 'SHA-1' in entry:
|
if 'SHA-1' in entry:
|
||||||
to_return[entry['SHA-1'].lower()] = entry
|
to_return[entry['SHA-1'].lower()] = entry
|
||||||
|
|
|
@ -5,12 +5,12 @@ import re
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from collections.abc import Mapping
|
from collections.abc import Mapping
|
||||||
from typing import Any, Dict, List, Optional, Set, Union, TYPE_CHECKING
|
from typing import Any, Dict, List, Optional, Set, Union, TYPE_CHECKING, Iterator
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from har2tree import HostNode, URLNode, Har2TreeError
|
from har2tree import HostNode, URLNode, Har2TreeError # type: ignore[attr-defined]
|
||||||
from pymisp import MISPAttribute, MISPEvent, PyMISP
|
from pymisp import MISPAttribute, MISPEvent, PyMISP, MISPTag # type: ignore[attr-defined]
|
||||||
from pymisp.tools import FileObject, URLObject
|
from pymisp.tools import FileObject, URLObject # type: ignore[attr-defined]
|
||||||
|
|
||||||
from ..default import get_config, get_homedir
|
from ..default import get_config, get_homedir
|
||||||
from ..helpers import get_public_suffix_list
|
from ..helpers import get_public_suffix_list
|
||||||
|
@ -21,7 +21,7 @@ if TYPE_CHECKING:
|
||||||
from ..capturecache import CaptureCache
|
from ..capturecache import CaptureCache
|
||||||
|
|
||||||
|
|
||||||
class MISPs(Mapping, AbstractModule):
|
class MISPs(Mapping, AbstractModule): # type: ignore[type-arg]
|
||||||
|
|
||||||
def module_init(self) -> bool:
|
def module_init(self) -> bool:
|
||||||
if not self.config.get('default'):
|
if not self.config.get('default'):
|
||||||
|
@ -37,7 +37,7 @@ class MISPs(Mapping, AbstractModule):
|
||||||
self.logger.warning(f"The default MISP instance ({self.default_instance}) is missing in the instances ({', '.join(self.config['instances'].keys())}), disabling MISP.")
|
self.logger.warning(f"The default MISP instance ({self.default_instance}) is missing in the instances ({', '.join(self.config['instances'].keys())}), disabling MISP.")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
self.__misps: Dict[str, 'MISP'] = {}
|
self.__misps = {}
|
||||||
for instance_name, instance_config in self.config['instances'].items():
|
for instance_name, instance_config in self.config['instances'].items():
|
||||||
if misp_connector := MISP(config=instance_config):
|
if misp_connector := MISP(config=instance_config):
|
||||||
if misp_connector.available:
|
if misp_connector.available:
|
||||||
|
@ -56,10 +56,10 @@ class MISPs(Mapping, AbstractModule):
|
||||||
def __getitem__(self, name: str) -> 'MISP':
|
def __getitem__(self, name: str) -> 'MISP':
|
||||||
return self.__misps[name]
|
return self.__misps[name]
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self) -> Iterator[dict[str, 'MISP']]:
|
||||||
return iter(self.__misps)
|
return iter(self.__misps)
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self) -> int:
|
||||||
return len(self.__misps)
|
return len(self.__misps)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -170,10 +170,10 @@ class MISP(AbstractModule):
|
||||||
self.psl = get_public_suffix_list()
|
self.psl = get_public_suffix_list()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def get_fav_tags(self):
|
def get_fav_tags(self) -> dict[Any, Any] | list[MISPTag]:
|
||||||
return self.client.tags(pythonify=True, favouritesOnly=1)
|
return self.client.tags(pythonify=True, favouritesOnly=1)
|
||||||
|
|
||||||
def _prepare_push(self, to_push: Union[List[MISPEvent], MISPEvent], allow_duplicates: bool=False, auto_publish: Optional[bool]=False) -> Union[List[MISPEvent], Dict]:
|
def _prepare_push(self, to_push: Union[List[MISPEvent], MISPEvent], allow_duplicates: bool=False, auto_publish: Optional[bool]=False) -> Union[List[MISPEvent], Dict[str, str]]:
|
||||||
'''Adds the pre-configured information as required by the instance.
|
'''Adds the pre-configured information as required by the instance.
|
||||||
If duplicates aren't allowed, they will be automatically skiped and the
|
If duplicates aren't allowed, they will be automatically skiped and the
|
||||||
extends_uuid key in the next element in the list updated'''
|
extends_uuid key in the next element in the list updated'''
|
||||||
|
@ -196,11 +196,11 @@ class MISP(AbstractModule):
|
||||||
for tag in self.default_tags:
|
for tag in self.default_tags:
|
||||||
event.add_tag(tag)
|
event.add_tag(tag)
|
||||||
if auto_publish:
|
if auto_publish:
|
||||||
event.publish()
|
event.publish() # type: ignore[no-untyped-call]
|
||||||
events_to_push.append(event)
|
events_to_push.append(event)
|
||||||
return events_to_push
|
return events_to_push
|
||||||
|
|
||||||
def push(self, to_push: Union[List[MISPEvent], MISPEvent], allow_duplicates: bool=False, auto_publish: Optional[bool]=None) -> Union[List[MISPEvent], Dict]:
|
def push(self, to_push: Union[List[MISPEvent], MISPEvent], allow_duplicates: bool=False, auto_publish: Optional[bool]=None) -> Union[List[MISPEvent], Dict[Any, Any]]:
|
||||||
if auto_publish is None:
|
if auto_publish is None:
|
||||||
auto_publish = self.auto_publish
|
auto_publish = self.auto_publish
|
||||||
if self.available and self.enable_push:
|
if self.available and self.enable_push:
|
||||||
|
|
|
@ -1,9 +1,11 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
from io import BytesIO
|
from __future__ import annotations
|
||||||
from typing import Dict
|
|
||||||
|
|
||||||
from pypandora import PyPandora
|
from io import BytesIO
|
||||||
|
from typing import Dict, Any
|
||||||
|
|
||||||
|
from pypandora import PyPandora # type: ignore[attr-defined]
|
||||||
|
|
||||||
from ..default import ConfigError
|
from ..default import ConfigError
|
||||||
from ..helpers import get_useragent_for_requests
|
from ..helpers import get_useragent_for_requests
|
||||||
|
@ -27,7 +29,7 @@ class Pandora(AbstractModule):
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def capture_default_trigger(self, file_in_memory: BytesIO, filename: str, /, auto_trigger: bool=False) -> Dict:
|
def capture_default_trigger(self, file_in_memory: BytesIO, filename: str, /, auto_trigger: bool=False) -> dict[str, str]:
|
||||||
'''Automatically submit the file if the landing URL is a file instead of a webpage'''
|
'''Automatically submit the file if the landing URL is a file instead of a webpage'''
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return {'error': 'Module not available'}
|
return {'error': 'Module not available'}
|
||||||
|
@ -39,7 +41,7 @@ class Pandora(AbstractModule):
|
||||||
self.submit_file(file_in_memory, filename)
|
self.submit_file(file_in_memory, filename)
|
||||||
return {'success': 'Module triggered'}
|
return {'success': 'Module triggered'}
|
||||||
|
|
||||||
def submit_file(self, file_in_memory: BytesIO, filename: str) -> Dict:
|
def submit_file(self, file_in_memory: BytesIO, filename: str) -> dict[str, Any]:
|
||||||
'''Submit a file to Pandora'''
|
'''Submit a file to Pandora'''
|
||||||
if not self.available:
|
if not self.available:
|
||||||
raise ConfigError('Pandora not available, probably not able to reach the server.')
|
raise ConfigError('Pandora not available, probably not able to reach the server.')
|
||||||
|
|
|
@ -1,11 +1,13 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from datetime import date, datetime, timedelta, timezone
|
from datetime import date, datetime, timedelta, timezone
|
||||||
from typing import Any, Dict, Optional, List, TYPE_CHECKING
|
from typing import Any, Dict, Optional, List, TYPE_CHECKING
|
||||||
|
|
||||||
from pyphishtanklookup import PhishtankLookup
|
from pyphishtanklookup import PhishtankLookup # type: ignore[attr-defined]
|
||||||
|
|
||||||
from ..default import ConfigError, get_homedir
|
from ..default import ConfigError, get_homedir
|
||||||
from ..helpers import get_cache_directory
|
from ..helpers import get_cache_directory
|
||||||
|
@ -38,7 +40,7 @@ class Phishtank(AbstractModule):
|
||||||
self.storage_dir_pt.mkdir(parents=True, exist_ok=True)
|
self.storage_dir_pt.mkdir(parents=True, exist_ok=True)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def get_url_lookup(self, url: str) -> Optional[Dict[str, Any]]:
|
def get_url_lookup(self, url: str) -> dict[str, Any] | None:
|
||||||
url_storage_dir = get_cache_directory(self.storage_dir_pt, url, 'url')
|
url_storage_dir = get_cache_directory(self.storage_dir_pt, url, 'url')
|
||||||
if not url_storage_dir.exists():
|
if not url_storage_dir.exists():
|
||||||
return None
|
return None
|
||||||
|
@ -49,10 +51,10 @@ class Phishtank(AbstractModule):
|
||||||
with cached_entries[0].open() as f:
|
with cached_entries[0].open() as f:
|
||||||
return json.load(f)
|
return json.load(f)
|
||||||
|
|
||||||
def lookup_ips_capture(self, cache: 'CaptureCache') -> Dict[str, List[Dict[str, Any]]]:
|
def lookup_ips_capture(self, cache: CaptureCache) -> dict[str, list[dict[str, Any]]]:
|
||||||
with (cache.capture_dir / 'ips.json').open() as f:
|
with (cache.capture_dir / 'ips.json').open() as f:
|
||||||
ips_dump = json.load(f)
|
ips_dump = json.load(f)
|
||||||
to_return: Dict[str, List[Dict[str, Any]]] = {}
|
to_return: dict[str, list[dict[str, Any]]] = {}
|
||||||
for ip in {ip for ips_list in ips_dump.values() for ip in ips_list}:
|
for ip in {ip for ips_list in ips_dump.values() for ip in ips_list}:
|
||||||
entry = self.get_ip_lookup(ip)
|
entry = self.get_ip_lookup(ip)
|
||||||
if not entry:
|
if not entry:
|
||||||
|
@ -64,7 +66,7 @@ class Phishtank(AbstractModule):
|
||||||
to_return[ip].append(entry)
|
to_return[ip].append(entry)
|
||||||
return to_return
|
return to_return
|
||||||
|
|
||||||
def get_ip_lookup(self, ip: str) -> Optional[Dict[str, Any]]:
|
def get_ip_lookup(self, ip: str) -> dict[str, Any] | None:
|
||||||
ip_storage_dir = get_cache_directory(self.storage_dir_pt, ip, 'ip')
|
ip_storage_dir = get_cache_directory(self.storage_dir_pt, ip, 'ip')
|
||||||
if not ip_storage_dir.exists():
|
if not ip_storage_dir.exists():
|
||||||
return None
|
return None
|
||||||
|
@ -75,7 +77,7 @@ class Phishtank(AbstractModule):
|
||||||
with cached_entries[0].open() as f:
|
with cached_entries[0].open() as f:
|
||||||
return json.load(f)
|
return json.load(f)
|
||||||
|
|
||||||
def capture_default_trigger(self, cache: 'CaptureCache', /, *, auto_trigger: bool=False) -> Dict:
|
def capture_default_trigger(self, cache: CaptureCache, /, *, auto_trigger: bool=False) -> dict[str, str]:
|
||||||
'''Run the module on all the nodes up to the final redirect'''
|
'''Run the module on all the nodes up to the final redirect'''
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return {'error': 'Module not available'}
|
return {'error': 'Module not available'}
|
||||||
|
|
|
@ -1,12 +1,14 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from datetime import date
|
from datetime import date
|
||||||
from typing import Any, Dict, Optional, TYPE_CHECKING
|
from typing import Any, Dict, Optional, TYPE_CHECKING
|
||||||
|
|
||||||
from pyeupi import PyEUPI
|
from pyeupi import PyEUPI # type: ignore[attr-defined]
|
||||||
|
|
||||||
from ..default import ConfigError, get_homedir
|
from ..default import ConfigError, get_homedir
|
||||||
from ..helpers import get_cache_directory
|
from ..helpers import get_cache_directory
|
||||||
|
@ -34,7 +36,7 @@ class PhishingInitiative(AbstractModule):
|
||||||
self.storage_dir_eupi.mkdir(parents=True, exist_ok=True)
|
self.storage_dir_eupi.mkdir(parents=True, exist_ok=True)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def get_url_lookup(self, url: str) -> Optional[Dict[str, Any]]:
|
def get_url_lookup(self, url: str) -> dict[str, Any] | None:
|
||||||
url_storage_dir = get_cache_directory(self.storage_dir_eupi, url)
|
url_storage_dir = get_cache_directory(self.storage_dir_eupi, url)
|
||||||
if not url_storage_dir.exists():
|
if not url_storage_dir.exists():
|
||||||
return None
|
return None
|
||||||
|
@ -45,7 +47,7 @@ class PhishingInitiative(AbstractModule):
|
||||||
with cached_entries[0].open() as f:
|
with cached_entries[0].open() as f:
|
||||||
return json.load(f)
|
return json.load(f)
|
||||||
|
|
||||||
def capture_default_trigger(self, cache: 'CaptureCache', /, *, force: bool=False, auto_trigger: bool=False) -> Dict:
|
def capture_default_trigger(self, cache: CaptureCache, /, *, force: bool=False, auto_trigger: bool=False) -> dict[str, str]:
|
||||||
'''Run the module on all the nodes up to the final redirect'''
|
'''Run the module on all the nodes up to the final redirect'''
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return {'error': 'Module not available'}
|
return {'error': 'Module not available'}
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from datetime import date, datetime, timedelta
|
from datetime import date, datetime, timedelta
|
||||||
|
@ -56,7 +58,7 @@ class RiskIQ(AbstractModule):
|
||||||
self.storage_dir_riskiq.mkdir(parents=True, exist_ok=True)
|
self.storage_dir_riskiq.mkdir(parents=True, exist_ok=True)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def get_passivedns(self, query: str) -> Optional[Dict[str, Any]]:
|
def get_passivedns(self, query: str) -> dict[str, Any] | None:
|
||||||
# The query can be IP or Hostname. For now, we only do it on domains.
|
# The query can be IP or Hostname. For now, we only do it on domains.
|
||||||
url_storage_dir = get_cache_directory(self.storage_dir_riskiq, query, 'pdns')
|
url_storage_dir = get_cache_directory(self.storage_dir_riskiq, query, 'pdns')
|
||||||
if not url_storage_dir.exists():
|
if not url_storage_dir.exists():
|
||||||
|
@ -68,7 +70,7 @@ class RiskIQ(AbstractModule):
|
||||||
with cached_entries[0].open() as f:
|
with cached_entries[0].open() as f:
|
||||||
return json.load(f)
|
return json.load(f)
|
||||||
|
|
||||||
def capture_default_trigger(self, cache: 'CaptureCache', /, *, force: bool=False, auto_trigger: bool=False) -> Dict:
|
def capture_default_trigger(self, cache: CaptureCache, /, *, force: bool=False, auto_trigger: bool=False) -> dict[str, str]:
|
||||||
'''Run the module on all the nodes up to the final redirect'''
|
'''Run the module on all the nodes up to the final redirect'''
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return {'error': 'Module not available'}
|
return {'error': 'Module not available'}
|
||||||
|
@ -88,7 +90,7 @@ class RiskIQ(AbstractModule):
|
||||||
self.pdns_lookup(hostname, force)
|
self.pdns_lookup(hostname, force)
|
||||||
return {'success': 'Module triggered'}
|
return {'success': 'Module triggered'}
|
||||||
|
|
||||||
def pdns_lookup(self, hostname: str, force: bool=False, first_seen: Optional[Union[date, datetime]]=None) -> None:
|
def pdns_lookup(self, hostname: str, force: bool=False, first_seen: date | datetime | None=None) -> None:
|
||||||
'''Lookup an hostname on RiskIQ Passive DNS
|
'''Lookup an hostname on RiskIQ Passive DNS
|
||||||
Note: force means re-fetch the entry RiskIQ even if we already did it today
|
Note: force means re-fetch the entry RiskIQ even if we already did it today
|
||||||
'''
|
'''
|
||||||
|
|
|
@ -1,10 +1,12 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from datetime import date
|
from datetime import date
|
||||||
from typing import Dict, Iterable, List, Union
|
from typing import Dict, Iterable, List, Union
|
||||||
|
|
||||||
from pysanejs import SaneJS
|
from pysanejs import SaneJS # type: ignore[attr-defined]
|
||||||
|
|
||||||
from ..default import get_homedir
|
from ..default import get_homedir
|
||||||
|
|
||||||
|
@ -29,7 +31,7 @@ class SaneJavaScript(AbstractModule):
|
||||||
self.storage_dir.mkdir(parents=True, exist_ok=True)
|
self.storage_dir.mkdir(parents=True, exist_ok=True)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def hashes_lookup(self, sha512: Union[Iterable[str], str], force: bool=False) -> Dict[str, List[str]]:
|
def hashes_lookup(self, sha512: Iterable[str] | str, force: bool=False) -> dict[str, list[str]]:
|
||||||
if isinstance(sha512, str):
|
if isinstance(sha512, str):
|
||||||
hashes: Iterable[str] = [sha512]
|
hashes: Iterable[str] = [sha512]
|
||||||
else:
|
else:
|
||||||
|
@ -43,7 +45,7 @@ class SaneJavaScript(AbstractModule):
|
||||||
with sanejs_unknowns.open() as f:
|
with sanejs_unknowns.open() as f:
|
||||||
unknown_hashes = {line.strip() for line in f.readlines()}
|
unknown_hashes = {line.strip() for line in f.readlines()}
|
||||||
|
|
||||||
to_return: Dict[str, List[str]] = {}
|
to_return: dict[str, list[str]] = {}
|
||||||
|
|
||||||
if force:
|
if force:
|
||||||
to_lookup = hashes
|
to_lookup = hashes
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from datetime import date
|
from datetime import date
|
||||||
from typing import Any, Dict, Optional, TYPE_CHECKING
|
from typing import Any, Dict, Optional, TYPE_CHECKING
|
||||||
|
@ -29,7 +31,7 @@ class URLhaus(AbstractModule):
|
||||||
self.storage_dir_uh.mkdir(parents=True, exist_ok=True)
|
self.storage_dir_uh.mkdir(parents=True, exist_ok=True)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def get_url_lookup(self, url: str) -> Optional[Dict[str, Any]]:
|
def get_url_lookup(self, url: str) -> dict[str, Any] | None:
|
||||||
url_storage_dir = get_cache_directory(self.storage_dir_uh, url, 'url')
|
url_storage_dir = get_cache_directory(self.storage_dir_uh, url, 'url')
|
||||||
if not url_storage_dir.exists():
|
if not url_storage_dir.exists():
|
||||||
return None
|
return None
|
||||||
|
@ -40,13 +42,13 @@ class URLhaus(AbstractModule):
|
||||||
with cached_entries[0].open() as f:
|
with cached_entries[0].open() as f:
|
||||||
return json.load(f)
|
return json.load(f)
|
||||||
|
|
||||||
def __url_result(self, url: str) -> Dict:
|
def __url_result(self, url: str) -> dict[str, Any]:
|
||||||
data = {'url': url}
|
data = {'url': url}
|
||||||
response = requests.post(f'{self.url}/url/', data)
|
response = requests.post(f'{self.url}/url/', data)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
return response.json()
|
return response.json()
|
||||||
|
|
||||||
def capture_default_trigger(self, cache: 'CaptureCache', /, *, auto_trigger: bool=False) -> Dict:
|
def capture_default_trigger(self, cache: CaptureCache, /, *, auto_trigger: bool=False) -> dict[str, str]:
|
||||||
'''Run the module on all the nodes up to the final redirect'''
|
'''Run the module on all the nodes up to the final redirect'''
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return {'error': 'Module not available'}
|
return {'error': 'Module not available'}
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from datetime import date
|
from datetime import date
|
||||||
from typing import Any, Dict, Optional, TYPE_CHECKING
|
from typing import Any, Dict, Optional, TYPE_CHECKING
|
||||||
|
@ -47,7 +49,7 @@ class UrlScan(AbstractModule):
|
||||||
self.storage_dir_urlscan.mkdir(parents=True, exist_ok=True)
|
self.storage_dir_urlscan.mkdir(parents=True, exist_ok=True)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def get_url_submission(self, capture_info: 'CaptureCache') -> Dict[str, Any]:
|
def get_url_submission(self, capture_info: CaptureCache) -> dict[str, Any]:
|
||||||
url_storage_dir = get_cache_directory(
|
url_storage_dir = get_cache_directory(
|
||||||
self.storage_dir_urlscan,
|
self.storage_dir_urlscan,
|
||||||
f'{capture_info.url}{capture_info.user_agent}{capture_info.referer}',
|
f'{capture_info.url}{capture_info.user_agent}{capture_info.referer}',
|
||||||
|
@ -61,7 +63,7 @@ class UrlScan(AbstractModule):
|
||||||
with cached_entries[0].open() as f:
|
with cached_entries[0].open() as f:
|
||||||
return json.load(f)
|
return json.load(f)
|
||||||
|
|
||||||
def capture_default_trigger(self, capture_info: 'CaptureCache', /, visibility: str, *, force: bool=False, auto_trigger: bool=False) -> Dict:
|
def capture_default_trigger(self, capture_info: CaptureCache, /, visibility: str, *, force: bool=False, auto_trigger: bool=False) -> dict[str, str]:
|
||||||
'''Run the module on the initial URL'''
|
'''Run the module on the initial URL'''
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return {'error': 'Module not available'}
|
return {'error': 'Module not available'}
|
||||||
|
@ -75,7 +77,7 @@ class UrlScan(AbstractModule):
|
||||||
self.url_submit(capture_info, visibility, force)
|
self.url_submit(capture_info, visibility, force)
|
||||||
return {'success': 'Module triggered'}
|
return {'success': 'Module triggered'}
|
||||||
|
|
||||||
def __submit_url(self, url: str, useragent: Optional[str], referer: Optional[str], visibility: str) -> Dict:
|
def __submit_url(self, url: str, useragent: str | None, referer: str | None, visibility: str) -> dict[str, Any]:
|
||||||
data = {'customagent': useragent if useragent else '', 'referer': referer if referer else ''}
|
data = {'customagent': useragent if useragent else '', 'referer': referer if referer else ''}
|
||||||
|
|
||||||
if not url.startswith('http'):
|
if not url.startswith('http'):
|
||||||
|
@ -96,12 +98,12 @@ class UrlScan(AbstractModule):
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
return response.json()
|
return response.json()
|
||||||
|
|
||||||
def __url_result(self, uuid: str) -> Dict:
|
def __url_result(self, uuid: str) -> dict[str, Any]:
|
||||||
response = self.client.get(f'https://urlscan.io/api/v1/result/{uuid}')
|
response = self.client.get(f'https://urlscan.io/api/v1/result/{uuid}')
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
return response.json()
|
return response.json()
|
||||||
|
|
||||||
def url_submit(self, capture_info: 'CaptureCache', visibility: str, force: bool=False) -> Dict:
|
def url_submit(self, capture_info: CaptureCache, visibility: str, force: bool=False) -> dict[str, Any]:
|
||||||
'''Lookup an URL on urlscan.io
|
'''Lookup an URL on urlscan.io
|
||||||
Note: force means 2 things:
|
Note: force means 2 things:
|
||||||
* (re)scan of the URL
|
* (re)scan of the URL
|
||||||
|
@ -142,7 +144,7 @@ class UrlScan(AbstractModule):
|
||||||
return response
|
return response
|
||||||
return {'error': 'Submitting is not allowed by the configuration'}
|
return {'error': 'Submitting is not allowed by the configuration'}
|
||||||
|
|
||||||
def url_result(self, capture_info: 'CaptureCache'):
|
def url_result(self, capture_info: CaptureCache) -> dict[str, Any]:
|
||||||
'''Get the result from a submission.'''
|
'''Get the result from a submission.'''
|
||||||
submission = self.get_url_submission(capture_info)
|
submission = self.get_url_submission(capture_info)
|
||||||
if submission and 'uuid' in submission:
|
if submission and 'uuid' in submission:
|
||||||
|
|
|
@ -1,11 +1,13 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
|
|
||||||
from typing import overload, Literal, List, Union
|
from typing import overload, Literal, List, Union
|
||||||
|
|
||||||
from har2tree import CrawledTree, Har2TreeError, HostNode
|
from har2tree import CrawledTree, Har2TreeError, HostNode # type: ignore[attr-defined]
|
||||||
|
|
||||||
from .abstractmodule import AbstractModule
|
from .abstractmodule import AbstractModule
|
||||||
|
|
||||||
|
@ -62,7 +64,7 @@ class UniversalWhois(AbstractModule):
|
||||||
self.query_whois_hostnode(n)
|
self.query_whois_hostnode(n)
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def whois(self, query: str, contact_email_only: Literal[True]) -> List[str]:
|
def whois(self, query: str, contact_email_only: Literal[True]) -> list[str]:
|
||||||
...
|
...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
|
@ -70,10 +72,10 @@ class UniversalWhois(AbstractModule):
|
||||||
...
|
...
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def whois(self, query: str, contact_email_only: bool=False) -> Union[str, List[str]]:
|
def whois(self, query: str, contact_email_only: bool=False) -> str | list[str]:
|
||||||
...
|
...
|
||||||
|
|
||||||
def whois(self, query: str, contact_email_only: bool=False) -> Union[str, List[str]]:
|
def whois(self, query: str, contact_email_only: bool=False) -> str | list[str]:
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return ''
|
return ''
|
||||||
bytes_whois = b''
|
bytes_whois = b''
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
from datetime import date
|
from datetime import date
|
||||||
|
@ -18,9 +20,10 @@ if TYPE_CHECKING:
|
||||||
from .abstractmodule import AbstractModule
|
from .abstractmodule import AbstractModule
|
||||||
|
|
||||||
|
|
||||||
def jsonify_vt(obj: WhistleBlowerDict):
|
def jsonify_vt(obj: WhistleBlowerDict) -> dict[str, Any]:
|
||||||
if isinstance(obj, WhistleBlowerDict):
|
if isinstance(obj, WhistleBlowerDict):
|
||||||
return {k: v for k, v in obj.items()}
|
return {k: v for k, v in obj.items()}
|
||||||
|
return obj
|
||||||
|
|
||||||
|
|
||||||
class VirusTotal(AbstractModule):
|
class VirusTotal(AbstractModule):
|
||||||
|
@ -39,7 +42,7 @@ class VirusTotal(AbstractModule):
|
||||||
self.storage_dir_vt.mkdir(parents=True, exist_ok=True)
|
self.storage_dir_vt.mkdir(parents=True, exist_ok=True)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def get_url_lookup(self, url: str) -> Optional[Dict[str, Any]]:
|
def get_url_lookup(self, url: str) -> dict[str, Any] | None:
|
||||||
url_storage_dir = get_cache_directory(self.storage_dir_vt, vt.url_id(url))
|
url_storage_dir = get_cache_directory(self.storage_dir_vt, vt.url_id(url))
|
||||||
if not url_storage_dir.exists():
|
if not url_storage_dir.exists():
|
||||||
return None
|
return None
|
||||||
|
@ -54,7 +57,7 @@ class VirusTotal(AbstractModule):
|
||||||
cached_entries[0].unlink(missing_ok=True)
|
cached_entries[0].unlink(missing_ok=True)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def capture_default_trigger(self, cache: 'CaptureCache', /, *, force: bool=False, auto_trigger: bool=False) -> Dict:
|
def capture_default_trigger(self, cache: CaptureCache, /, *, force: bool=False, auto_trigger: bool=False) -> dict[str, str]:
|
||||||
'''Run the module on all the nodes up to the final redirect'''
|
'''Run the module on all the nodes up to the final redirect'''
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return {'error': 'Module not available'}
|
return {'error': 'Module not available'}
|
||||||
|
|
|
@ -0,0 +1,8 @@
|
||||||
|
[mypy]
|
||||||
|
strict = True
|
||||||
|
warn_return_any = False
|
||||||
|
show_error_context = True
|
||||||
|
pretty = True
|
||||||
|
|
||||||
|
[mypy-docs.source.*]
|
||||||
|
ignore_errors = True
|
|
@ -1447,18 +1447,18 @@ referencing = ">=0.31.0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lacuscore"
|
name = "lacuscore"
|
||||||
version = "1.7.8"
|
version = "1.7.9"
|
||||||
description = "Core of Lacus, usable as a module"
|
description = "Core of Lacus, usable as a module"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.8,<4.0"
|
python-versions = ">=3.8,<4.0"
|
||||||
files = [
|
files = [
|
||||||
{file = "lacuscore-1.7.8-py3-none-any.whl", hash = "sha256:b877567a7efb35802c5fb6a01a8b88602978c16b49ee0ceead937337c6710081"},
|
{file = "lacuscore-1.7.9-py3-none-any.whl", hash = "sha256:74309aa4216fabffadd4ab724f8f2273d12e59dedd8e826e2710847d92497f8c"},
|
||||||
{file = "lacuscore-1.7.8.tar.gz", hash = "sha256:e0aa938a6555c8fe8485777e04c2ca549cd3b1fd7a75e7839d49a3fef1499252"},
|
{file = "lacuscore-1.7.9.tar.gz", hash = "sha256:cb0df82d88ffe805fc78c60e535ee54d82842b763a84ad97cfc2a5a99d4c3ed7"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
defang = ">=0.5.3,<0.6.0"
|
defang = ">=0.5.3,<0.6.0"
|
||||||
playwrightcapture = {version = ">=1.22.5,<2.0.0", extras = ["recaptcha"]}
|
playwrightcapture = {version = ">=1.22.6,<2.0.0", extras = ["recaptcha"]}
|
||||||
redis = {version = ">=5.0.1,<6.0.0", extras = ["hiredis"]}
|
redis = {version = ">=5.0.1,<6.0.0", extras = ["hiredis"]}
|
||||||
requests = ">=2.31.0,<3.0.0"
|
requests = ">=2.31.0,<3.0.0"
|
||||||
ua-parser = ">=0.18.0,<0.19.0"
|
ua-parser = ">=0.18.0,<0.19.0"
|
||||||
|
@ -2154,13 +2154,13 @@ test = ["pytest"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "playwrightcapture"
|
name = "playwrightcapture"
|
||||||
version = "1.22.5"
|
version = "1.22.6"
|
||||||
description = "A simple library to capture websites using playwright"
|
description = "A simple library to capture websites using playwright"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.8,<4.0"
|
python-versions = ">=3.8,<4.0"
|
||||||
files = [
|
files = [
|
||||||
{file = "playwrightcapture-1.22.5-py3-none-any.whl", hash = "sha256:023d394efe2c6173178ac7a9143a9b77400704b965280c494e9bb418eaa2ea86"},
|
{file = "playwrightcapture-1.22.6-py3-none-any.whl", hash = "sha256:910ad4dabbc51864f1c8fed6e62c2869a519211bcf7ae6e9c5aac3ea29268e33"},
|
||||||
{file = "playwrightcapture-1.22.5.tar.gz", hash = "sha256:8fac3bf723536ebc6ff0e1908aa838029a8b6e8ed1998fd162d5557d1d3fb2ec"},
|
{file = "playwrightcapture-1.22.6.tar.gz", hash = "sha256:b5c377585aba9ff71f055127b6be86458503ff3308e8fc8225dd4c05ab9597ae"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
|
@ -2173,7 +2173,7 @@ pytz = {version = ">=2023.3.post1,<2024.0", markers = "python_version < \"3.9\""
|
||||||
requests = {version = ">=2.31.0,<3.0.0", extras = ["socks"], optional = true, markers = "extra == \"recaptcha\""}
|
requests = {version = ">=2.31.0,<3.0.0", extras = ["socks"], optional = true, markers = "extra == \"recaptcha\""}
|
||||||
setuptools = ">=69.0.3,<70.0.0"
|
setuptools = ">=69.0.3,<70.0.0"
|
||||||
SpeechRecognition = {version = ">=3.10.1,<4.0.0", optional = true, markers = "extra == \"recaptcha\""}
|
SpeechRecognition = {version = ">=3.10.1,<4.0.0", optional = true, markers = "extra == \"recaptcha\""}
|
||||||
tzdata = ">=2023.3,<2024.0"
|
tzdata = ">=2023.4,<2024.0"
|
||||||
w3lib = ">=2.1.2,<3.0.0"
|
w3lib = ">=2.1.2,<3.0.0"
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
|
@ -3592,4 +3592,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = ">=3.8.1,<3.12"
|
python-versions = ">=3.8.1,<3.12"
|
||||||
content-hash = "9e6afc44fccf8789e1968b698fc9a6632bfb7fb5d053a404356000386d1fd3ad"
|
content-hash = "95ea92c4f809ea280840866efc4385f75bbb4c7ace7cb9ac4979c17df722fd02"
|
||||||
|
|
|
@ -65,7 +65,7 @@ passivetotal = "^2.5.9"
|
||||||
werkzeug = "^3.0.1"
|
werkzeug = "^3.0.1"
|
||||||
filetype = "^1.2.0"
|
filetype = "^1.2.0"
|
||||||
pypandora = "^1.6.1"
|
pypandora = "^1.6.1"
|
||||||
lacuscore = "^1.7.8"
|
lacuscore = "^1.7.9"
|
||||||
pylacus = "^1.7.1"
|
pylacus = "^1.7.1"
|
||||||
pyipasnhistory = "^2.1.2"
|
pyipasnhistory = "^2.1.2"
|
||||||
publicsuffixlist = "^0.10.0.20231214"
|
publicsuffixlist = "^0.10.0.20231214"
|
||||||
|
@ -103,17 +103,3 @@ types-pytz = "^2023.3.1.1"
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["poetry_core"]
|
requires = ["poetry_core"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[tool.mypy]
|
|
||||||
check_untyped_defs = true
|
|
||||||
ignore_errors = false
|
|
||||||
ignore_missing_imports = false
|
|
||||||
strict_optional = true
|
|
||||||
no_implicit_optional = true
|
|
||||||
warn_unused_ignores = true
|
|
||||||
warn_redundant_casts = true
|
|
||||||
warn_unused_configs = true
|
|
||||||
warn_unreachable = true
|
|
||||||
|
|
||||||
show_error_context = true
|
|
||||||
pretty = true
|
|
||||||
|
|
|
@ -9,7 +9,7 @@ from lookyloo.default import safe_create_dir, get_socket_path
|
||||||
from lookyloo.helpers import get_captures_dir
|
from lookyloo.helpers import get_captures_dir
|
||||||
|
|
||||||
|
|
||||||
def rename_captures():
|
def rename_captures() -> None:
|
||||||
r = Redis(unix_socket_path=get_socket_path('cache'))
|
r = Redis(unix_socket_path=get_socket_path('cache'))
|
||||||
capture_dir: Path = get_captures_dir()
|
capture_dir: Path = get_captures_dir()
|
||||||
for uuid_path in capture_dir.glob('*/uuid'):
|
for uuid_path in capture_dir.glob('*/uuid'):
|
||||||
|
|
|
@ -9,7 +9,7 @@ import s3fs # type: ignore
|
||||||
from lookyloo.default import get_config
|
from lookyloo.default import get_config
|
||||||
|
|
||||||
|
|
||||||
def check_path(path: str):
|
def check_path(path: str) -> dict[str, str]:
|
||||||
s3fs_config = get_config('generic', 's3fs')
|
s3fs_config = get_config('generic', 's3fs')
|
||||||
s3fs_client = s3fs.S3FileSystem(key=s3fs_config['config']['key'],
|
s3fs_client = s3fs.S3FileSystem(key=s3fs_config['config']['key'],
|
||||||
secret=s3fs_config['config']['secret'],
|
secret=s3fs_config['config']['secret'],
|
||||||
|
|
|
@ -4,14 +4,14 @@ import base64
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from typing import Dict
|
from typing import Dict, Any
|
||||||
|
|
||||||
from lookyloo.default import get_homedir
|
from lookyloo.default import get_homedir
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
dest_dir = get_homedir() / 'website' / 'web'
|
dest_dir = get_homedir() / 'website' / 'web'
|
||||||
|
|
||||||
to_save: Dict = {'static': {}}
|
to_save: Dict[str, Any] = {'static': {}}
|
||||||
|
|
||||||
for resource in (dest_dir / 'static').glob('*'):
|
for resource in (dest_dir / 'static').glob('*'):
|
||||||
if resource.name[0] == '.':
|
if resource.name[0] == '.':
|
||||||
|
|
|
@ -73,7 +73,7 @@ def ua_parser(html_content: str) -> Dict[str, Any]:
|
||||||
return to_store
|
return to_store
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main() -> None:
|
||||||
to_parse = Path('Most Common User Agents - Tech Blog (wh).html')
|
to_parse = Path('Most Common User Agents - Tech Blog (wh).html')
|
||||||
|
|
||||||
today = datetime.now()
|
today = datetime.now()
|
||||||
|
|
|
@ -1,9 +1,11 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from typing import List, Tuple
|
from typing import List, Tuple, Any
|
||||||
|
|
||||||
from redis import Redis
|
from redis import Redis
|
||||||
from redis.exceptions import ConnectionError
|
from redis.exceptions import ConnectionError
|
||||||
|
@ -21,11 +23,11 @@ console = Console(color_system="256")
|
||||||
class Monitoring():
|
class Monitoring():
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self.redis_cache: Redis = Redis(unix_socket_path=get_socket_path('cache'), decode_responses=True)
|
self.redis_cache: Redis = Redis(unix_socket_path=get_socket_path('cache'), decode_responses=True) # type: ignore[type-arg]
|
||||||
self.redis_indexing: Redis = Redis(unix_socket_path=get_socket_path('indexing'), decode_responses=True)
|
self.redis_indexing: Redis = Redis(unix_socket_path=get_socket_path('indexing'), decode_responses=True) # type: ignore[type-arg]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def backend_status(self):
|
def backend_status(self) -> bool:
|
||||||
socket_path_cache = get_socket_path('cache')
|
socket_path_cache = get_socket_path('cache')
|
||||||
socket_path_index = get_socket_path('indexing')
|
socket_path_index = get_socket_path('indexing')
|
||||||
backend_up = True
|
backend_up = True
|
||||||
|
@ -56,12 +58,12 @@ class Monitoring():
|
||||||
return backend_up
|
return backend_up
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def queues(self):
|
def queues(self) -> list[tuple[str, float]]:
|
||||||
return self.redis_cache.zrevrangebyscore('queues', 'Inf', '-Inf', withscores=True)
|
return self.redis_cache.zrevrangebyscore('queues', 'Inf', '-Inf', withscores=True)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def ongoing_captures(self):
|
def ongoing_captures(self) -> list[tuple[str, float, dict[str, Any]]]:
|
||||||
captures_uuid: List[Tuple[str, float]] = self.redis_cache.zrevrangebyscore('to_capture', 'Inf', '-Inf', withscores=True)
|
captures_uuid: list[tuple[str, float]] = self.redis_cache.zrevrangebyscore('to_capture', 'Inf', '-Inf', withscores=True)
|
||||||
if not captures_uuid:
|
if not captures_uuid:
|
||||||
return []
|
return []
|
||||||
to_return = []
|
to_return = []
|
||||||
|
@ -75,7 +77,7 @@ class Monitoring():
|
||||||
return to_return
|
return to_return
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def tree_cache(self):
|
def tree_cache(self) -> dict[str, str]:
|
||||||
to_return = {}
|
to_return = {}
|
||||||
for pid_name, value in self.redis_cache.hgetall('tree_cache').items():
|
for pid_name, value in self.redis_cache.hgetall('tree_cache').items():
|
||||||
pid, name = pid_name.split('|', 1)
|
pid, name = pid_name.split('|', 1)
|
||||||
|
|
|
@ -4,14 +4,14 @@ import csv
|
||||||
import argparse
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from lookyloo.lookyloo import Indexing, Lookyloo
|
from lookyloo import Indexing, Lookyloo
|
||||||
from lookyloo.helpers import get_captures_dir
|
from lookyloo.helpers import get_captures_dir
|
||||||
|
|
||||||
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
||||||
level=logging.INFO)
|
level=logging.INFO)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main() -> None:
|
||||||
parser = argparse.ArgumentParser(description='Rebuild the redis cache.')
|
parser = argparse.ArgumentParser(description='Rebuild the redis cache.')
|
||||||
parser.add_argument('--rebuild_pickles', default=False, action='store_true', help='Delete and rebuild the pickles. Count 20s/pickle, it can take a very long time.')
|
parser.add_argument('--rebuild_pickles', default=False, action='store_true', help='Delete and rebuild the pickles. Count 20s/pickle, it can take a very long time.')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
@ -30,7 +30,7 @@ def main():
|
||||||
with index.open('r') as _f:
|
with index.open('r') as _f:
|
||||||
recent_uuids = {uuid: str(index.parent / dirname) for uuid, dirname in csv.reader(_f) if (index.parent / dirname).exists()}
|
recent_uuids = {uuid: str(index.parent / dirname) for uuid, dirname in csv.reader(_f) if (index.parent / dirname).exists()}
|
||||||
if recent_uuids:
|
if recent_uuids:
|
||||||
lookyloo.redis.hset('lookup_dirs', mapping=recent_uuids)
|
lookyloo.redis.hset('lookup_dirs', mapping=recent_uuids) # type: ignore[arg-type]
|
||||||
|
|
||||||
# This call will rebuild all the caches as needed.
|
# This call will rebuild all the caches as needed.
|
||||||
lookyloo.sorted_capture_cache()
|
lookyloo.sorted_capture_cache()
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
from lookyloo.lookyloo import Lookyloo
|
from lookyloo import Lookyloo
|
||||||
import calendar
|
import calendar
|
||||||
import datetime
|
import datetime
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from typing import Dict, Any, Union, Set
|
from typing import Dict, Any, Union, Set, List
|
||||||
|
|
||||||
lookyloo = Lookyloo()
|
lookyloo = Lookyloo()
|
||||||
|
|
||||||
|
@ -15,11 +15,12 @@ weeks_stats: Dict[int, Dict[str, Union[int, Set[str]]]] = \
|
||||||
calendar_week: {'analysis': 0, 'analysis_with_redirects': 0, 'redirects': 0, 'uniq_urls': set()}}
|
calendar_week: {'analysis': 0, 'analysis_with_redirects': 0, 'redirects': 0, 'uniq_urls': set()}}
|
||||||
|
|
||||||
|
|
||||||
def uniq_domains(uniq_urls):
|
def uniq_domains(uniq_urls: List[str]) -> Set[str]:
|
||||||
domains = set()
|
domains = set()
|
||||||
for url in uniq_urls:
|
for url in uniq_urls:
|
||||||
splitted = urlparse(url)
|
splitted = urlparse(url)
|
||||||
domains.add(splitted.hostname)
|
if splitted.hostname:
|
||||||
|
domains.add(splitted.hostname)
|
||||||
return domains
|
return domains
|
||||||
|
|
||||||
|
|
||||||
|
@ -50,8 +51,8 @@ for week_number, week_stat in weeks_stats.items():
|
||||||
print(' Number of analysis with redirects:', week_stat['analysis_with_redirects'])
|
print(' Number of analysis with redirects:', week_stat['analysis_with_redirects'])
|
||||||
print(' Number of redirects:', week_stat['redirects'])
|
print(' Number of redirects:', week_stat['redirects'])
|
||||||
print(' Number of unique URLs:', len(week_stat['uniq_urls'])) # type: ignore
|
print(' Number of unique URLs:', len(week_stat['uniq_urls'])) # type: ignore
|
||||||
domains = uniq_domains(week_stat['uniq_urls'])
|
d = uniq_domains(week_stat['uniq_urls']) # type: ignore[arg-type]
|
||||||
print(' Number of unique domains:', len(domains))
|
print(' Number of unique domains:', len(d))
|
||||||
|
|
||||||
|
|
||||||
for year, data in stats.items():
|
for year, data in stats.items():
|
||||||
|
|
|
@ -7,7 +7,7 @@ import argparse
|
||||||
from lookyloo.default import get_homedir
|
from lookyloo.default import get_homedir
|
||||||
|
|
||||||
|
|
||||||
def validate_generic_config_file():
|
def validate_generic_config_file() -> bool:
|
||||||
sample_config = get_homedir() / 'config' / 'generic.json.sample'
|
sample_config = get_homedir() / 'config' / 'generic.json.sample'
|
||||||
with sample_config.open() as f:
|
with sample_config.open() as f:
|
||||||
generic_config_sample = json.load(f)
|
generic_config_sample = json.load(f)
|
||||||
|
@ -53,7 +53,7 @@ def validate_generic_config_file():
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def validate_modules_config_file():
|
def validate_modules_config_file() -> bool:
|
||||||
with (get_homedir() / 'config' / 'modules.json').open() as f:
|
with (get_homedir() / 'config' / 'modules.json').open() as f:
|
||||||
modules_config = json.load(f)
|
modules_config = json.load(f)
|
||||||
with (get_homedir() / 'config' / 'modules.json.sample').open() as f:
|
with (get_homedir() / 'config' / 'modules.json.sample').open() as f:
|
||||||
|
@ -69,7 +69,7 @@ def validate_modules_config_file():
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def update_user_configs():
|
def update_user_configs() -> bool:
|
||||||
for file_name in ['generic', 'modules']:
|
for file_name in ['generic', 'modules']:
|
||||||
with (get_homedir() / 'config' / f'{file_name}.json').open() as f:
|
with (get_homedir() / 'config' / f'{file_name}.json').open() as f:
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
import calendar
|
import calendar
|
||||||
import functools
|
import functools
|
||||||
|
@ -22,14 +24,15 @@ from uuid import uuid4
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
|
|
||||||
import flask_login # type: ignore
|
import flask_login # type: ignore
|
||||||
from flask import (Flask, Response, flash, jsonify, redirect, render_template,
|
from flask import (Flask, Response, Request, flash, jsonify, redirect, render_template,
|
||||||
request, send_file, url_for)
|
request, send_file, url_for)
|
||||||
from flask_bootstrap import Bootstrap5 # type: ignore
|
from flask_bootstrap import Bootstrap5 # type: ignore
|
||||||
from flask_cors import CORS # type: ignore
|
from flask_cors import CORS # type: ignore
|
||||||
from flask_restx import Api # type: ignore
|
from flask_restx import Api # type: ignore
|
||||||
from lacuscore import CaptureStatus
|
from lacuscore import CaptureStatus
|
||||||
from pymisp import MISPEvent, MISPServerError
|
from pymisp import MISPEvent, MISPServerError # type: ignore[attr-defined]
|
||||||
from werkzeug.security import check_password_hash
|
from werkzeug.security import check_password_hash
|
||||||
|
from werkzeug.wrappers.response import Response as WerkzeugResponse
|
||||||
|
|
||||||
from lookyloo.default import get_config
|
from lookyloo.default import get_config
|
||||||
from lookyloo.exceptions import MissingUUID, NoValidHarFile
|
from lookyloo.exceptions import MissingUUID, NoValidHarFile
|
||||||
|
@ -71,8 +74,8 @@ login_manager.init_app(app)
|
||||||
user_agents = UserAgents()
|
user_agents = UserAgents()
|
||||||
|
|
||||||
|
|
||||||
@login_manager.user_loader
|
@login_manager.user_loader # type: ignore[misc]
|
||||||
def user_loader(username):
|
def user_loader(username: str) -> User | None:
|
||||||
if username not in build_users_table():
|
if username not in build_users_table():
|
||||||
return None
|
return None
|
||||||
user = User()
|
user = User()
|
||||||
|
@ -80,13 +83,13 @@ def user_loader(username):
|
||||||
return user
|
return user
|
||||||
|
|
||||||
|
|
||||||
@login_manager.request_loader
|
@login_manager.request_loader # type: ignore[misc]
|
||||||
def _load_user_from_request(request):
|
def _load_user_from_request(request: Request) -> User | None:
|
||||||
return load_user_from_request(request)
|
return load_user_from_request(request)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/login', methods=['GET', 'POST'])
|
@app.route('/login', methods=['GET', 'POST'])
|
||||||
def login():
|
def login() -> WerkzeugResponse | str | Response:
|
||||||
if request.method == 'GET':
|
if request.method == 'GET':
|
||||||
return '''
|
return '''
|
||||||
<form action='login' method='POST'>
|
<form action='login' method='POST'>
|
||||||
|
@ -110,8 +113,8 @@ def login():
|
||||||
|
|
||||||
|
|
||||||
@app.route('/logout')
|
@app.route('/logout')
|
||||||
@flask_login.login_required
|
@flask_login.login_required # type: ignore[misc]
|
||||||
def logout():
|
def logout() -> WerkzeugResponse:
|
||||||
flask_login.logout_user()
|
flask_login.logout_user()
|
||||||
flash('Successfully logged out.', 'success')
|
flash('Successfully logged out.', 'success')
|
||||||
return redirect(url_for('index'))
|
return redirect(url_for('index'))
|
||||||
|
@ -141,7 +144,7 @@ hide_captures_with_error = get_config('generic', 'hide_captures_with_error')
|
||||||
|
|
||||||
# Method to make sizes in bytes human readable
|
# Method to make sizes in bytes human readable
|
||||||
# Source: https://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
|
# Source: https://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
|
||||||
def sizeof_fmt(num, suffix='B'):
|
def sizeof_fmt(num: float, suffix: str='B') -> str:
|
||||||
for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
|
for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
|
||||||
if abs(num) < 1024.0:
|
if abs(num) < 1024.0:
|
||||||
return f"{num:3.1f}{unit}{suffix}"
|
return f"{num:3.1f}{unit}{suffix}"
|
||||||
|
@ -152,7 +155,7 @@ def sizeof_fmt(num, suffix='B'):
|
||||||
app.jinja_env.globals.update(sizeof_fmt=sizeof_fmt)
|
app.jinja_env.globals.update(sizeof_fmt=sizeof_fmt)
|
||||||
|
|
||||||
|
|
||||||
def http_status_description(code: int):
|
def http_status_description(code: int) -> str:
|
||||||
if code in http.client.responses:
|
if code in http.client.responses:
|
||||||
return http.client.responses[code]
|
return http.client.responses[code]
|
||||||
return f'Invalid code: {code}'
|
return f'Invalid code: {code}'
|
||||||
|
@ -161,7 +164,7 @@ def http_status_description(code: int):
|
||||||
app.jinja_env.globals.update(http_status_description=http_status_description)
|
app.jinja_env.globals.update(http_status_description=http_status_description)
|
||||||
|
|
||||||
|
|
||||||
def month_name(month: int):
|
def month_name(month: int) -> str:
|
||||||
return calendar.month_name[month]
|
return calendar.month_name[month]
|
||||||
|
|
||||||
|
|
||||||
|
@ -181,8 +184,8 @@ class Icon(TypedDict):
|
||||||
tooltip: str
|
tooltip: str
|
||||||
|
|
||||||
|
|
||||||
def get_icon(icon_id: str) -> Optional[Icon]:
|
def get_icon(icon_id: str) -> Icon | None:
|
||||||
available_icons: Dict[str, Icon] = {
|
available_icons: dict[str, Icon] = {
|
||||||
'js': {'icon': "javascript.png", 'tooltip': 'The content of the response is a javascript'},
|
'js': {'icon': "javascript.png", 'tooltip': 'The content of the response is a javascript'},
|
||||||
'exe': {'icon': "exe.png", 'tooltip': 'The content of the response is an executable'},
|
'exe': {'icon': "exe.png", 'tooltip': 'The content of the response is an executable'},
|
||||||
'css': {'icon': "css.png", 'tooltip': 'The content of the response is a CSS'},
|
'css': {'icon': "css.png", 'tooltip': 'The content of the response is a CSS'},
|
||||||
|
@ -208,7 +211,7 @@ def get_icon(icon_id: str) -> Optional[Icon]:
|
||||||
app.jinja_env.globals.update(get_icon=get_icon)
|
app.jinja_env.globals.update(get_icon=get_icon)
|
||||||
|
|
||||||
|
|
||||||
def get_tz_info() -> Tuple[Optional[str], str, Set[str]]:
|
def get_tz_info() -> tuple[str | None, str, set[str]]:
|
||||||
now = datetime.now().astimezone()
|
now = datetime.now().astimezone()
|
||||||
local_TZ = now.tzname()
|
local_TZ = now.tzname()
|
||||||
local_UTC_offset = f'UTC{now.strftime("%z")}'
|
local_UTC_offset = f'UTC{now.strftime("%z")}'
|
||||||
|
@ -221,7 +224,7 @@ app.jinja_env.globals.update(tz_info=get_tz_info)
|
||||||
# ##### Generic/configuration methods #####
|
# ##### Generic/configuration methods #####
|
||||||
|
|
||||||
@app.after_request
|
@app.after_request
|
||||||
def after_request(response):
|
def after_request(response: Response) -> Response:
|
||||||
if use_own_ua:
|
if use_own_ua:
|
||||||
# We keep a list user agents in order to build a list to use in the capture
|
# We keep a list user agents in order to build a list to use in the capture
|
||||||
# interface: this is the easiest way to have something up to date.
|
# interface: this is the easiest way to have something up to date.
|
||||||
|
@ -241,9 +244,9 @@ def after_request(response):
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
def file_response(func):
|
def file_response(func): # type: ignore[no-untyped-def]
|
||||||
@functools.wraps(func)
|
@functools.wraps(func)
|
||||||
def wrapper(*args, **kwargs):
|
def wrapper(*args, **kwargs) -> Response: # type: ignore[no-untyped-def]
|
||||||
try:
|
try:
|
||||||
return func(*args, **kwargs)
|
return func(*args, **kwargs)
|
||||||
except NoValidHarFile:
|
except NoValidHarFile:
|
||||||
|
@ -259,23 +262,23 @@ def file_response(func):
|
||||||
# ##### Hostnode level methods #####
|
# ##### Hostnode level methods #####
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/host/<string:node_uuid>/hashes', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/host/<string:node_uuid>/hashes', methods=['GET'])
|
||||||
@file_response
|
@file_response # type: ignore[misc]
|
||||||
def hashes_hostnode(tree_uuid: str, node_uuid: str):
|
def hashes_hostnode(tree_uuid: str, node_uuid: str) -> Response:
|
||||||
hashes = lookyloo.get_hashes(tree_uuid, hostnode_uuid=node_uuid)
|
hashes = lookyloo.get_hashes(tree_uuid, hostnode_uuid=node_uuid)
|
||||||
return send_file(BytesIO('\n'.join(hashes).encode()),
|
return send_file(BytesIO('\n'.join(hashes).encode()),
|
||||||
mimetype='test/plain', as_attachment=True, download_name=f'hashes.{node_uuid}.txt')
|
mimetype='test/plain', as_attachment=True, download_name=f'hashes.{node_uuid}.txt')
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/host/<string:node_uuid>/text', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/host/<string:node_uuid>/text', methods=['GET'])
|
||||||
@file_response
|
@file_response # type: ignore[misc]
|
||||||
def urls_hostnode(tree_uuid: str, node_uuid: str):
|
def urls_hostnode(tree_uuid: str, node_uuid: str) -> Response:
|
||||||
hostnode = lookyloo.get_hostnode_from_tree(tree_uuid, node_uuid)
|
hostnode = lookyloo.get_hostnode_from_tree(tree_uuid, node_uuid)
|
||||||
return send_file(BytesIO('\n'.join(url.name for url in hostnode.urls).encode()),
|
return send_file(BytesIO('\n'.join(url.name for url in hostnode.urls).encode()),
|
||||||
mimetype='test/plain', as_attachment=True, download_name=f'urls.{node_uuid}.txt')
|
mimetype='test/plain', as_attachment=True, download_name=f'urls.{node_uuid}.txt')
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/host/<string:node_uuid>', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/host/<string:node_uuid>', methods=['GET'])
|
||||||
def hostnode_popup(tree_uuid: str, node_uuid: str):
|
def hostnode_popup(tree_uuid: str, node_uuid: str) -> str | WerkzeugResponse | Response:
|
||||||
try:
|
try:
|
||||||
hostnode, urls = lookyloo.get_hostnode_investigator(tree_uuid, node_uuid)
|
hostnode, urls = lookyloo.get_hostnode_investigator(tree_uuid, node_uuid)
|
||||||
except IndexError:
|
except IndexError:
|
||||||
|
@ -294,7 +297,7 @@ def hostnode_popup(tree_uuid: str, node_uuid: str):
|
||||||
# ##### Tree level Methods #####
|
# ##### Tree level Methods #####
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/trigger_modules', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/trigger_modules', methods=['GET'])
|
||||||
def trigger_modules(tree_uuid: str):
|
def trigger_modules(tree_uuid: str) -> WerkzeugResponse | str | Response:
|
||||||
force = True if (request.args.get('force') and request.args.get('force') == 'True') else False
|
force = True if (request.args.get('force') and request.args.get('force') == 'True') else False
|
||||||
auto_trigger = True if (request.args.get('auto_trigger') and request.args.get('auto_trigger') == 'True') else False
|
auto_trigger = True if (request.args.get('auto_trigger') and request.args.get('auto_trigger') == 'True') else False
|
||||||
lookyloo.trigger_modules(tree_uuid, force=force, auto_trigger=auto_trigger)
|
lookyloo.trigger_modules(tree_uuid, force=force, auto_trigger=auto_trigger)
|
||||||
|
@ -302,7 +305,7 @@ def trigger_modules(tree_uuid: str):
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/historical_lookups', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/historical_lookups', methods=['GET'])
|
||||||
def historical_lookups(tree_uuid: str):
|
def historical_lookups(tree_uuid: str) -> str | WerkzeugResponse | Response:
|
||||||
force = True if (request.args.get('force') and request.args.get('force') == 'True') else False
|
force = True if (request.args.get('force') and request.args.get('force') == 'True') else False
|
||||||
data = lookyloo.get_historical_lookups(tree_uuid, force)
|
data = lookyloo.get_historical_lookups(tree_uuid, force)
|
||||||
return render_template('historical_lookups.html', tree_uuid=tree_uuid,
|
return render_template('historical_lookups.html', tree_uuid=tree_uuid,
|
||||||
|
@ -312,7 +315,7 @@ def historical_lookups(tree_uuid: str):
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/categories_capture/', defaults={'query': ''})
|
@app.route('/tree/<string:tree_uuid>/categories_capture/', defaults={'query': ''})
|
||||||
@app.route('/tree/<string:tree_uuid>/categories_capture/<string:query>', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/categories_capture/<string:query>', methods=['GET'])
|
||||||
def categories_capture(tree_uuid: str, query: str):
|
def categories_capture(tree_uuid: str, query: str) -> str | WerkzeugResponse | Response:
|
||||||
if not enable_categorization:
|
if not enable_categorization:
|
||||||
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
||||||
current_categories = lookyloo.categories_capture(tree_uuid)
|
current_categories = lookyloo.categories_capture(tree_uuid)
|
||||||
|
@ -330,7 +333,7 @@ def categories_capture(tree_uuid: str, query: str):
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/uncategorize/', defaults={'category': ''})
|
@app.route('/tree/<string:tree_uuid>/uncategorize/', defaults={'category': ''})
|
||||||
@app.route('/tree/<string:tree_uuid>/uncategorize/<string:category>', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/uncategorize/<string:category>', methods=['GET'])
|
||||||
def uncategorize_capture(tree_uuid: str, category: str):
|
def uncategorize_capture(tree_uuid: str, category: str) -> str | WerkzeugResponse | Response:
|
||||||
if not enable_categorization:
|
if not enable_categorization:
|
||||||
return jsonify({'response': 'Categorization not enabled.'})
|
return jsonify({'response': 'Categorization not enabled.'})
|
||||||
lookyloo.uncategorize_capture(tree_uuid, category)
|
lookyloo.uncategorize_capture(tree_uuid, category)
|
||||||
|
@ -339,7 +342,7 @@ def uncategorize_capture(tree_uuid: str, category: str):
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/categorize/', defaults={'category': ''})
|
@app.route('/tree/<string:tree_uuid>/categorize/', defaults={'category': ''})
|
||||||
@app.route('/tree/<string:tree_uuid>/categorize/<string:category>', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/categorize/<string:category>', methods=['GET'])
|
||||||
def categorize_capture(tree_uuid: str, category: str):
|
def categorize_capture(tree_uuid: str, category: str) -> str | WerkzeugResponse | Response:
|
||||||
if not enable_categorization:
|
if not enable_categorization:
|
||||||
return jsonify({'response': 'Categorization not enabled.'})
|
return jsonify({'response': 'Categorization not enabled.'})
|
||||||
lookyloo.categorize_capture(tree_uuid, category)
|
lookyloo.categorize_capture(tree_uuid, category)
|
||||||
|
@ -347,19 +350,19 @@ def categorize_capture(tree_uuid: str, category: str):
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/stats', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/stats', methods=['GET'])
|
||||||
def stats(tree_uuid: str):
|
def stats(tree_uuid: str) -> str:
|
||||||
stats = lookyloo.get_statistics(tree_uuid)
|
stats = lookyloo.get_statistics(tree_uuid)
|
||||||
return render_template('statistics.html', uuid=tree_uuid, stats=stats)
|
return render_template('statistics.html', uuid=tree_uuid, stats=stats)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/misp_lookup', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/misp_lookup', methods=['GET'])
|
||||||
@flask_login.login_required
|
@flask_login.login_required # type: ignore[misc]
|
||||||
def web_misp_lookup_view(tree_uuid: str):
|
def web_misp_lookup_view(tree_uuid: str) -> str | WerkzeugResponse | Response:
|
||||||
if not lookyloo.misps.available:
|
if not lookyloo.misps.available:
|
||||||
flash('There are no MISP instances available.', 'error')
|
flash('There are no MISP instances available.', 'error')
|
||||||
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
||||||
misps_occurrences = {}
|
misps_occurrences = {}
|
||||||
for instance_name in lookyloo.misps:
|
for instance_name in lookyloo.misps.keys():
|
||||||
if occurrences := lookyloo.get_misp_occurrences(tree_uuid, instance_name=instance_name):
|
if occurrences := lookyloo.get_misp_occurrences(tree_uuid, instance_name=instance_name):
|
||||||
misps_occurrences[instance_name] = occurrences
|
misps_occurrences[instance_name] = occurrences
|
||||||
return render_template('misp_lookup.html', uuid=tree_uuid,
|
return render_template('misp_lookup.html', uuid=tree_uuid,
|
||||||
|
@ -368,8 +371,8 @@ def web_misp_lookup_view(tree_uuid: str):
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/misp_push', methods=['GET', 'POST'])
|
@app.route('/tree/<string:tree_uuid>/misp_push', methods=['GET', 'POST'])
|
||||||
@flask_login.login_required
|
@flask_login.login_required # type: ignore[misc]
|
||||||
def web_misp_push_view(tree_uuid: str):
|
def web_misp_push_view(tree_uuid: str) -> str | WerkzeugResponse | Response | None:
|
||||||
if not lookyloo.misps.available:
|
if not lookyloo.misps.available:
|
||||||
flash('There are no MISP instances available.', 'error')
|
flash('There are no MISP instances available.', 'error')
|
||||||
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
||||||
|
@ -413,7 +416,7 @@ def web_misp_push_view(tree_uuid: str):
|
||||||
# Submit the event
|
# Submit the event
|
||||||
tags = request.form.getlist('tags')
|
tags = request.form.getlist('tags')
|
||||||
error = False
|
error = False
|
||||||
events: List[MISPEvent] = []
|
events: list[MISPEvent] = []
|
||||||
with_parents = request.form.get('with_parents')
|
with_parents = request.form.get('with_parents')
|
||||||
if with_parents:
|
if with_parents:
|
||||||
exports = lookyloo.misp_export(tree_uuid, True)
|
exports = lookyloo.misp_export(tree_uuid, True)
|
||||||
|
@ -447,15 +450,16 @@ def web_misp_push_view(tree_uuid: str):
|
||||||
for e in new_events:
|
for e in new_events:
|
||||||
flash(f'MISP event {e.id} created on {misp.client.root_url}', 'success')
|
flash(f'MISP event {e.id} created on {misp.client.root_url}', 'success')
|
||||||
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/modules', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/modules', methods=['GET'])
|
||||||
def modules(tree_uuid: str):
|
def modules(tree_uuid: str) -> str | WerkzeugResponse | Response:
|
||||||
modules_responses = lookyloo.get_modules_responses(tree_uuid)
|
modules_responses = lookyloo.get_modules_responses(tree_uuid)
|
||||||
if not modules_responses:
|
if not modules_responses:
|
||||||
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
||||||
|
|
||||||
vt_short_result: Dict[str, Dict[str, Any]] = {}
|
vt_short_result: dict[str, dict[str, Any]] = {}
|
||||||
if 'vt' in modules_responses:
|
if 'vt' in modules_responses:
|
||||||
# VirusTotal cleanup
|
# VirusTotal cleanup
|
||||||
vt = modules_responses.pop('vt')
|
vt = modules_responses.pop('vt')
|
||||||
|
@ -471,7 +475,7 @@ def modules(tree_uuid: str):
|
||||||
if result['category'] == 'malicious':
|
if result['category'] == 'malicious':
|
||||||
vt_short_result[url]['malicious'].append((vendor, result['result']))
|
vt_short_result[url]['malicious'].append((vendor, result['result']))
|
||||||
|
|
||||||
pi_short_result: Dict[str, str] = {}
|
pi_short_result: dict[str, str] = {}
|
||||||
if 'pi' in modules_responses:
|
if 'pi' in modules_responses:
|
||||||
pi = modules_responses.pop('pi')
|
pi = modules_responses.pop('pi')
|
||||||
for url, full_report in pi.items():
|
for url, full_report in pi.items():
|
||||||
|
@ -479,7 +483,7 @@ def modules(tree_uuid: str):
|
||||||
continue
|
continue
|
||||||
pi_short_result[url] = full_report['results'][0]['tag_label']
|
pi_short_result[url] = full_report['results'][0]['tag_label']
|
||||||
|
|
||||||
phishtank_short_result: Dict[str, Dict] = {'urls': {}, 'ips_hits': {}}
|
phishtank_short_result: dict[str, dict[str, Any]] = {'urls': {}, 'ips_hits': {}}
|
||||||
if 'phishtank' in modules_responses:
|
if 'phishtank' in modules_responses:
|
||||||
pt = modules_responses.pop('phishtank')
|
pt = modules_responses.pop('phishtank')
|
||||||
for url, full_report in pt['urls'].items():
|
for url, full_report in pt['urls'].items():
|
||||||
|
@ -496,7 +500,7 @@ def modules(tree_uuid: str):
|
||||||
full_report['url'],
|
full_report['url'],
|
||||||
full_report['phish_detail_url']))
|
full_report['phish_detail_url']))
|
||||||
|
|
||||||
urlhaus_short_result: Dict[str, List] = {'urls': []}
|
urlhaus_short_result: dict[str, list[Any]] = {'urls': []}
|
||||||
if 'urlhaus' in modules_responses:
|
if 'urlhaus' in modules_responses:
|
||||||
# TODO: make a short result
|
# TODO: make a short result
|
||||||
uh = modules_responses.pop('urlhaus')
|
uh = modules_responses.pop('urlhaus')
|
||||||
|
@ -504,7 +508,7 @@ def modules(tree_uuid: str):
|
||||||
if results:
|
if results:
|
||||||
urlhaus_short_result['urls'].append(results)
|
urlhaus_short_result['urls'].append(results)
|
||||||
|
|
||||||
urlscan_to_display: Dict = {}
|
urlscan_to_display: dict[str, Any] = {}
|
||||||
if 'urlscan' in modules_responses and modules_responses.get('urlscan'):
|
if 'urlscan' in modules_responses and modules_responses.get('urlscan'):
|
||||||
urlscan = modules_responses.pop('urlscan')
|
urlscan = modules_responses.pop('urlscan')
|
||||||
if 'error' in urlscan['submission']:
|
if 'error' in urlscan['submission']:
|
||||||
|
@ -534,8 +538,8 @@ def modules(tree_uuid: str):
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/redirects', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/redirects', methods=['GET'])
|
||||||
@file_response
|
@file_response # type: ignore[misc]
|
||||||
def redirects(tree_uuid: str):
|
def redirects(tree_uuid: str) -> Response:
|
||||||
cache = lookyloo.capture_cache(tree_uuid)
|
cache = lookyloo.capture_cache(tree_uuid)
|
||||||
if not cache or not hasattr(cache, 'redirects'):
|
if not cache or not hasattr(cache, 'redirects'):
|
||||||
return Response('Not available.', mimetype='text/text')
|
return Response('Not available.', mimetype='text/text')
|
||||||
|
@ -550,8 +554,8 @@ def redirects(tree_uuid: str):
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/image', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/image', methods=['GET'])
|
||||||
@file_response
|
@file_response # type: ignore[misc]
|
||||||
def image(tree_uuid: str):
|
def image(tree_uuid: str) -> Response:
|
||||||
max_width = request.args.get('width')
|
max_width = request.args.get('width')
|
||||||
if max_width and max_width.isdigit():
|
if max_width and max_width.isdigit():
|
||||||
to_return = lookyloo.get_screenshot_thumbnail(tree_uuid, width=int(max_width))
|
to_return = lookyloo.get_screenshot_thumbnail(tree_uuid, width=int(max_width))
|
||||||
|
@ -562,12 +566,11 @@ def image(tree_uuid: str):
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/data', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/data', methods=['GET'])
|
||||||
@file_response
|
@file_response # type: ignore[misc]
|
||||||
def data(tree_uuid: str):
|
def data(tree_uuid: str) -> Response:
|
||||||
filename, data = lookyloo.get_data(tree_uuid)
|
filename, data = lookyloo.get_data(tree_uuid)
|
||||||
if len(filename) == 0:
|
if len(filename) == 0:
|
||||||
# TODO: return something saying it is not a valid request
|
return Response('No files.', mimetype='text/text')
|
||||||
return
|
|
||||||
|
|
||||||
if filetype.guess_mime(data.getvalue()) is None:
|
if filetype.guess_mime(data.getvalue()) is None:
|
||||||
mime = 'application/octet-stream'
|
mime = 'application/octet-stream'
|
||||||
|
@ -579,46 +582,46 @@ def data(tree_uuid: str):
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/thumbnail/', defaults={'width': 64}, methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/thumbnail/', defaults={'width': 64}, methods=['GET'])
|
||||||
@app.route('/tree/<string:tree_uuid>/thumbnail/<int:width>', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/thumbnail/<int:width>', methods=['GET'])
|
||||||
@file_response
|
@file_response # type: ignore[misc]
|
||||||
def thumbnail(tree_uuid: str, width: int):
|
def thumbnail(tree_uuid: str, width: int) -> Response:
|
||||||
to_return = lookyloo.get_screenshot_thumbnail(tree_uuid, for_datauri=False, width=width)
|
to_return = lookyloo.get_screenshot_thumbnail(tree_uuid, for_datauri=False, width=width)
|
||||||
return send_file(to_return, mimetype='image/png')
|
return send_file(to_return, mimetype='image/png')
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/html', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/html', methods=['GET'])
|
||||||
@file_response
|
@file_response # type: ignore[misc]
|
||||||
def html(tree_uuid: str):
|
def html(tree_uuid: str) -> Response:
|
||||||
to_return = lookyloo.get_html(tree_uuid)
|
to_return = lookyloo.get_html(tree_uuid)
|
||||||
return send_file(to_return, mimetype='text/html',
|
return send_file(to_return, mimetype='text/html',
|
||||||
as_attachment=True, download_name='page.html')
|
as_attachment=True, download_name='page.html')
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/cookies', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/cookies', methods=['GET'])
|
||||||
@file_response
|
@file_response # type: ignore[misc]
|
||||||
def cookies(tree_uuid: str):
|
def cookies(tree_uuid: str) -> Response:
|
||||||
to_return = lookyloo.get_cookies(tree_uuid)
|
to_return = lookyloo.get_cookies(tree_uuid)
|
||||||
return send_file(to_return, mimetype='application/json',
|
return send_file(to_return, mimetype='application/json',
|
||||||
as_attachment=True, download_name='cookies.json')
|
as_attachment=True, download_name='cookies.json')
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/hashes', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/hashes', methods=['GET'])
|
||||||
@file_response
|
@file_response # type: ignore[misc]
|
||||||
def hashes_tree(tree_uuid: str):
|
def hashes_tree(tree_uuid: str) -> Response:
|
||||||
hashes = lookyloo.get_hashes(tree_uuid)
|
hashes = lookyloo.get_hashes(tree_uuid)
|
||||||
return send_file(BytesIO('\n'.join(hashes).encode()),
|
return send_file(BytesIO('\n'.join(hashes).encode()),
|
||||||
mimetype='test/plain', as_attachment=True, download_name='hashes.txt')
|
mimetype='test/plain', as_attachment=True, download_name='hashes.txt')
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/export', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/export', methods=['GET'])
|
||||||
@file_response
|
@file_response # type: ignore[misc]
|
||||||
def export(tree_uuid: str):
|
def export(tree_uuid: str) -> Response:
|
||||||
to_return = lookyloo.get_capture(tree_uuid)
|
to_return = lookyloo.get_capture(tree_uuid)
|
||||||
return send_file(to_return, mimetype='application/zip',
|
return send_file(to_return, mimetype='application/zip',
|
||||||
as_attachment=True, download_name='capture.zip')
|
as_attachment=True, download_name='capture.zip')
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/urls_rendered_page', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/urls_rendered_page', methods=['GET'])
|
||||||
def urls_rendered_page(tree_uuid: str):
|
def urls_rendered_page(tree_uuid: str) -> WerkzeugResponse | str | Response:
|
||||||
try:
|
try:
|
||||||
urls = lookyloo.get_urls_rendered_page(tree_uuid)
|
urls = lookyloo.get_urls_rendered_page(tree_uuid)
|
||||||
return render_template('urls_rendered.html', base_tree_uuid=tree_uuid, urls=urls)
|
return render_template('urls_rendered.html', base_tree_uuid=tree_uuid, urls=urls)
|
||||||
|
@ -628,7 +631,7 @@ def urls_rendered_page(tree_uuid: str):
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/hashlookup', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/hashlookup', methods=['GET'])
|
||||||
def hashlookup(tree_uuid: str):
|
def hashlookup(tree_uuid: str) -> str | WerkzeugResponse | Response:
|
||||||
merged, total_ressources = lookyloo.merge_hashlookup_tree(tree_uuid)
|
merged, total_ressources = lookyloo.merge_hashlookup_tree(tree_uuid)
|
||||||
# We only want unique URLs for the template
|
# We only want unique URLs for the template
|
||||||
for sha1, entries in merged.items():
|
for sha1, entries in merged.items():
|
||||||
|
@ -637,7 +640,7 @@ def hashlookup(tree_uuid: str):
|
||||||
|
|
||||||
|
|
||||||
@app.route('/bulk_captures/<string:base_tree_uuid>', methods=['POST'])
|
@app.route('/bulk_captures/<string:base_tree_uuid>', methods=['POST'])
|
||||||
def bulk_captures(base_tree_uuid: str):
|
def bulk_captures(base_tree_uuid: str) -> WerkzeugResponse | str | Response:
|
||||||
if flask_login.current_user.is_authenticated:
|
if flask_login.current_user.is_authenticated:
|
||||||
user = flask_login.current_user.get_id()
|
user = flask_login.current_user.get_id()
|
||||||
else:
|
else:
|
||||||
|
@ -666,16 +669,16 @@ def bulk_captures(base_tree_uuid: str):
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/hide', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/hide', methods=['GET'])
|
||||||
@flask_login.login_required
|
@flask_login.login_required # type: ignore[misc]
|
||||||
def hide_capture(tree_uuid: str):
|
def hide_capture(tree_uuid: str) -> WerkzeugResponse:
|
||||||
lookyloo.hide_capture(tree_uuid)
|
lookyloo.hide_capture(tree_uuid)
|
||||||
flash('Successfully hidden.', 'success')
|
flash('Successfully hidden.', 'success')
|
||||||
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/rebuild')
|
@app.route('/tree/<string:tree_uuid>/rebuild')
|
||||||
@flask_login.login_required
|
@flask_login.login_required # type: ignore[misc]
|
||||||
def rebuild_tree(tree_uuid: str):
|
def rebuild_tree(tree_uuid: str) -> WerkzeugResponse:
|
||||||
try:
|
try:
|
||||||
lookyloo.remove_pickle(tree_uuid)
|
lookyloo.remove_pickle(tree_uuid)
|
||||||
flash('Successfully rebuilt.', 'success')
|
flash('Successfully rebuilt.', 'success')
|
||||||
|
@ -685,13 +688,13 @@ def rebuild_tree(tree_uuid: str):
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/cache', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/cache', methods=['GET'])
|
||||||
def cache_tree(tree_uuid: str):
|
def cache_tree(tree_uuid: str) -> WerkzeugResponse:
|
||||||
lookyloo.capture_cache(tree_uuid)
|
lookyloo.capture_cache(tree_uuid)
|
||||||
return redirect(url_for('index'))
|
return redirect(url_for('index'))
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/monitor', methods=['POST', 'GET'])
|
@app.route('/tree/<string:tree_uuid>/monitor', methods=['POST', 'GET'])
|
||||||
def monitor(tree_uuid: str):
|
def monitor(tree_uuid: str) -> WerkzeugResponse:
|
||||||
if not lookyloo.monitoring_enabled:
|
if not lookyloo.monitoring_enabled:
|
||||||
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
||||||
if request.form.get('name') or not request.form.get('confirm'):
|
if request.form.get('name') or not request.form.get('confirm'):
|
||||||
|
@ -702,7 +705,7 @@ def monitor(tree_uuid: str):
|
||||||
collection: str = request.form['collection'] if request.form.get('collection') else ''
|
collection: str = request.form['collection'] if request.form.get('collection') else ''
|
||||||
notification_email: str = request.form['notification'] if request.form.get('notification') else ''
|
notification_email: str = request.form['notification'] if request.form.get('notification') else ''
|
||||||
frequency: str = request.form['frequency'] if request.form.get('frequency') else 'daily'
|
frequency: str = request.form['frequency'] if request.form.get('frequency') else 'daily'
|
||||||
expire_at: Optional[float] = datetime.fromisoformat(request.form['expire_at']).timestamp() if request.form.get('expire_at') else None
|
expire_at: float | None = datetime.fromisoformat(request.form['expire_at']).timestamp() if request.form.get('expire_at') else None
|
||||||
cache = lookyloo.capture_cache(tree_uuid)
|
cache = lookyloo.capture_cache(tree_uuid)
|
||||||
if cache:
|
if cache:
|
||||||
monitoring_uuid = lookyloo.monitoring.monitor({'url': cache.url, 'user_agent': cache.user_agent, 'listing': False},
|
monitoring_uuid = lookyloo.monitoring.monitor({'url': cache.url, 'user_agent': cache.user_agent, 'listing': False},
|
||||||
|
@ -719,7 +722,7 @@ def monitor(tree_uuid: str):
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/send_mail', methods=['POST', 'GET'])
|
@app.route('/tree/<string:tree_uuid>/send_mail', methods=['POST', 'GET'])
|
||||||
def send_mail(tree_uuid: str):
|
def send_mail(tree_uuid: str) -> WerkzeugResponse:
|
||||||
if not enable_mail_notification:
|
if not enable_mail_notification:
|
||||||
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
||||||
if request.form.get('name') or not request.form.get('confirm'):
|
if request.form.get('name') or not request.form.get('confirm'):
|
||||||
|
@ -739,7 +742,7 @@ def send_mail(tree_uuid: str):
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>', methods=['GET'])
|
||||||
@app.route('/tree/<string:tree_uuid>/<string:node_uuid>', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/<string:node_uuid>', methods=['GET'])
|
||||||
def tree(tree_uuid: str, node_uuid: Optional[str]=None):
|
def tree(tree_uuid: str, node_uuid: str | None=None) -> Response | str | WerkzeugResponse:
|
||||||
if tree_uuid == 'False':
|
if tree_uuid == 'False':
|
||||||
flash("Unable to process your request.", 'warning')
|
flash("Unable to process your request.", 'warning')
|
||||||
return redirect(url_for('index'))
|
return redirect(url_for('index'))
|
||||||
|
@ -820,10 +823,10 @@ def tree(tree_uuid: str, node_uuid: Optional[str]=None):
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/mark_as_legitimate', methods=['POST'])
|
@app.route('/tree/<string:tree_uuid>/mark_as_legitimate', methods=['POST'])
|
||||||
@flask_login.login_required
|
@flask_login.login_required # type: ignore[misc]
|
||||||
def mark_as_legitimate(tree_uuid: str):
|
def mark_as_legitimate(tree_uuid: str) -> Response:
|
||||||
if request.data:
|
if request.data:
|
||||||
legitimate_entries: Dict = request.get_json(force=True)
|
legitimate_entries: dict[str, Any] = request.get_json(force=True)
|
||||||
lookyloo.add_to_legitimate(tree_uuid, **legitimate_entries)
|
lookyloo.add_to_legitimate(tree_uuid, **legitimate_entries)
|
||||||
else:
|
else:
|
||||||
lookyloo.add_to_legitimate(tree_uuid)
|
lookyloo.add_to_legitimate(tree_uuid)
|
||||||
|
@ -831,13 +834,13 @@ def mark_as_legitimate(tree_uuid: str):
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/body_hashes', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/body_hashes', methods=['GET'])
|
||||||
def tree_body_hashes(tree_uuid: str):
|
def tree_body_hashes(tree_uuid: str) -> str:
|
||||||
body_hashes = lookyloo.get_all_body_hashes(tree_uuid)
|
body_hashes = lookyloo.get_all_body_hashes(tree_uuid)
|
||||||
return render_template('tree_body_hashes.html', tree_uuid=tree_uuid, body_hashes=body_hashes)
|
return render_template('tree_body_hashes.html', tree_uuid=tree_uuid, body_hashes=body_hashes)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/pandora', methods=['GET', 'POST'])
|
@app.route('/tree/<string:tree_uuid>/pandora', methods=['GET', 'POST'])
|
||||||
def pandora_submit(tree_uuid: str):
|
def pandora_submit(tree_uuid: str) -> dict[str, Any] | Response:
|
||||||
node_uuid = None
|
node_uuid = None
|
||||||
if request.method == 'POST':
|
if request.method == 'POST':
|
||||||
input_json = request.get_json(force=True)
|
input_json = request.get_json(force=True)
|
||||||
|
@ -860,14 +863,14 @@ def pandora_submit(tree_uuid: str):
|
||||||
|
|
||||||
# ##### helpers #####
|
# ##### helpers #####
|
||||||
|
|
||||||
def index_generic(show_hidden: bool=False, show_error: bool=True, category: Optional[str]=None):
|
def index_generic(show_hidden: bool=False, show_error: bool=True, category: str | None=None) -> str:
|
||||||
"""This method is used to generate the index page. It is possible that some of the captures
|
"""This method is used to generate the index page. It is possible that some of the captures
|
||||||
do not have their pickle yet.
|
do not have their pickle yet.
|
||||||
|
|
||||||
We must assume that calling cached.tree will fail, and handle it gracefully.
|
We must assume that calling cached.tree will fail, and handle it gracefully.
|
||||||
"""
|
"""
|
||||||
titles = []
|
titles = []
|
||||||
cut_time: Optional[datetime] = None
|
cut_time: datetime | None = None
|
||||||
if time_delta_on_index:
|
if time_delta_on_index:
|
||||||
# We want to filter the captures on the index
|
# We want to filter the captures on the index
|
||||||
cut_time = (datetime.now() - timedelta(**time_delta_on_index))
|
cut_time = (datetime.now() - timedelta(**time_delta_on_index))
|
||||||
|
@ -899,7 +902,7 @@ def index_generic(show_hidden: bool=False, show_error: bool=True, category: Opti
|
||||||
version=pkg_version)
|
version=pkg_version)
|
||||||
|
|
||||||
|
|
||||||
def get_index_params(request):
|
def get_index_params(request: Request) -> tuple[bool, str]:
|
||||||
show_error: bool = True
|
show_error: bool = True
|
||||||
category: str = ''
|
category: str = ''
|
||||||
if hide_captures_with_error:
|
if hide_captures_with_error:
|
||||||
|
@ -913,7 +916,7 @@ def get_index_params(request):
|
||||||
# ##### Index level methods #####
|
# ##### Index level methods #####
|
||||||
|
|
||||||
@app.route('/', methods=['GET'])
|
@app.route('/', methods=['GET'])
|
||||||
def index():
|
def index() -> str:
|
||||||
if request.method == 'HEAD':
|
if request.method == 'HEAD':
|
||||||
# Just returns ack if the webserver is running
|
# Just returns ack if the webserver is running
|
||||||
return 'Ack'
|
return 'Ack'
|
||||||
|
@ -922,28 +925,28 @@ def index():
|
||||||
|
|
||||||
|
|
||||||
@app.route('/hidden', methods=['GET'])
|
@app.route('/hidden', methods=['GET'])
|
||||||
@flask_login.login_required
|
@flask_login.login_required # type: ignore[misc]
|
||||||
def index_hidden():
|
def index_hidden() -> str:
|
||||||
show_error, category = get_index_params(request)
|
show_error, category = get_index_params(request)
|
||||||
return index_generic(show_hidden=True, show_error=show_error, category=category)
|
return index_generic(show_hidden=True, show_error=show_error, category=category)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/cookies', methods=['GET'])
|
@app.route('/cookies', methods=['GET'])
|
||||||
def cookies_lookup():
|
def cookies_lookup() -> str:
|
||||||
cookies_names = [(name, freq, lookyloo.indexing.cookies_names_number_domains(name))
|
cookies_names = [(name, freq, lookyloo.indexing.cookies_names_number_domains(name))
|
||||||
for name, freq in lookyloo.indexing.cookies_names]
|
for name, freq in lookyloo.indexing.cookies_names]
|
||||||
return render_template('cookies.html', cookies_names=cookies_names)
|
return render_template('cookies.html', cookies_names=cookies_names)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/hhhashes', methods=['GET'])
|
@app.route('/hhhashes', methods=['GET'])
|
||||||
def hhhashes_lookup():
|
def hhhashes_lookup() -> str:
|
||||||
hhhashes = [(hhh, freq, lookyloo.indexing.http_headers_hashes_number_captures(hhh))
|
hhhashes = [(hhh, freq, lookyloo.indexing.http_headers_hashes_number_captures(hhh))
|
||||||
for hhh, freq in lookyloo.indexing.http_headers_hashes]
|
for hhh, freq in lookyloo.indexing.http_headers_hashes]
|
||||||
return render_template('hhhashes.html', hhhashes=hhhashes)
|
return render_template('hhhashes.html', hhhashes=hhhashes)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/ressources', methods=['GET'])
|
@app.route('/ressources', methods=['GET'])
|
||||||
def ressources():
|
def ressources() -> str:
|
||||||
ressources = []
|
ressources = []
|
||||||
for h, freq in lookyloo.indexing.ressources:
|
for h, freq in lookyloo.indexing.ressources:
|
||||||
domain_freq = lookyloo.indexing.ressources_number_domains(h)
|
domain_freq = lookyloo.indexing.ressources_number_domains(h)
|
||||||
|
@ -961,26 +964,26 @@ def ressources():
|
||||||
|
|
||||||
|
|
||||||
@app.route('/categories', methods=['GET'])
|
@app.route('/categories', methods=['GET'])
|
||||||
def categories():
|
def categories() -> str:
|
||||||
return render_template('categories.html', categories=lookyloo.indexing.categories)
|
return render_template('categories.html', categories=lookyloo.indexing.categories)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/rebuild_all')
|
@app.route('/rebuild_all')
|
||||||
@flask_login.login_required
|
@flask_login.login_required # type: ignore[misc]
|
||||||
def rebuild_all():
|
def rebuild_all() -> WerkzeugResponse:
|
||||||
lookyloo.rebuild_all()
|
lookyloo.rebuild_all()
|
||||||
return redirect(url_for('index'))
|
return redirect(url_for('index'))
|
||||||
|
|
||||||
|
|
||||||
@app.route('/rebuild_cache')
|
@app.route('/rebuild_cache')
|
||||||
@flask_login.login_required
|
@flask_login.login_required # type: ignore[misc]
|
||||||
def rebuild_cache():
|
def rebuild_cache() -> WerkzeugResponse:
|
||||||
lookyloo.rebuild_cache()
|
lookyloo.rebuild_cache()
|
||||||
return redirect(url_for('index'))
|
return redirect(url_for('index'))
|
||||||
|
|
||||||
|
|
||||||
@app.route('/search', methods=['GET', 'POST'])
|
@app.route('/search', methods=['GET', 'POST'])
|
||||||
def search():
|
def search() -> str | Response | WerkzeugResponse:
|
||||||
if request.form.get('url'):
|
if request.form.get('url'):
|
||||||
quoted_url: str = quote_plus(request.form['url'])
|
quoted_url: str = quote_plus(request.form['url'])
|
||||||
return redirect(url_for('url_details', url=quoted_url))
|
return redirect(url_for('url_details', url=quoted_url))
|
||||||
|
@ -993,7 +996,7 @@ def search():
|
||||||
return render_template('search.html')
|
return render_template('search.html')
|
||||||
|
|
||||||
|
|
||||||
def _prepare_capture_template(user_ua: Optional[str], predefined_url: Optional[str]=None):
|
def _prepare_capture_template(user_ua: str | None, predefined_url: str | None=None) -> str:
|
||||||
return render_template('capture.html', user_agents=user_agents.user_agents,
|
return render_template('capture.html', user_agents=user_agents.user_agents,
|
||||||
default=user_agents.default,
|
default=user_agents.default,
|
||||||
personal_ua=user_ua,
|
personal_ua=user_ua,
|
||||||
|
@ -1004,7 +1007,7 @@ def _prepare_capture_template(user_ua: Optional[str], predefined_url: Optional[s
|
||||||
|
|
||||||
|
|
||||||
@app.route('/recapture/<string:tree_uuid>', methods=['GET'])
|
@app.route('/recapture/<string:tree_uuid>', methods=['GET'])
|
||||||
def recapture(tree_uuid: str):
|
def recapture(tree_uuid: str) -> str | Response | WerkzeugResponse:
|
||||||
cache = lookyloo.capture_cache(tree_uuid)
|
cache = lookyloo.capture_cache(tree_uuid)
|
||||||
if cache and hasattr(cache, 'url'):
|
if cache and hasattr(cache, 'url'):
|
||||||
return _prepare_capture_template(user_ua=request.headers.get('User-Agent'),
|
return _prepare_capture_template(user_ua=request.headers.get('User-Agent'),
|
||||||
|
@ -1016,15 +1019,15 @@ def recapture(tree_uuid: str):
|
||||||
# ################## Submit existing capture ##################
|
# ################## Submit existing capture ##################
|
||||||
|
|
||||||
@app.route('/submit_capture', methods=['GET', 'POST'])
|
@app.route('/submit_capture', methods=['GET', 'POST'])
|
||||||
def submit_capture():
|
def submit_capture() -> str | Response | WerkzeugResponse:
|
||||||
|
|
||||||
if request.method == 'POST':
|
if request.method == 'POST':
|
||||||
listing = True if request.form.get('listing') else False
|
listing = True if request.form.get('listing') else False
|
||||||
uuid = str(uuid4()) # NOTE: new UUID, because we do not want duplicates
|
uuid = str(uuid4()) # NOTE: new UUID, because we do not want duplicates
|
||||||
har: Optional[Dict[str, Any]] = None
|
har: dict[str, Any] | None = None
|
||||||
html: Optional[str] = None
|
html: str | None = None
|
||||||
last_redirected_url: Optional[str] = None
|
last_redirected_url: str | None = None
|
||||||
screenshot: Optional[bytes] = None
|
screenshot: bytes | None = None
|
||||||
if 'har_file' in request.files and request.files['har_file']:
|
if 'har_file' in request.files and request.files['har_file']:
|
||||||
har = json.loads(request.files['har_file'].stream.read())
|
har = json.loads(request.files['har_file'].stream.read())
|
||||||
last_redirected_url = request.form.get('landing_page')
|
last_redirected_url = request.form.get('landing_page')
|
||||||
|
@ -1038,7 +1041,7 @@ def submit_capture():
|
||||||
return redirect(url_for('tree', tree_uuid=uuid))
|
return redirect(url_for('tree', tree_uuid=uuid))
|
||||||
elif 'full_capture' in request.files and request.files['full_capture']:
|
elif 'full_capture' in request.files and request.files['full_capture']:
|
||||||
# it *only* accepts a lookyloo export.
|
# it *only* accepts a lookyloo export.
|
||||||
cookies: Optional[List[Dict[str, str]]] = None
|
cookies: list[dict[str, str]] | None = None
|
||||||
has_error = False
|
has_error = False
|
||||||
with ZipFile(BytesIO(request.files['full_capture'].stream.read()), 'r') as lookyloo_capture:
|
with ZipFile(BytesIO(request.files['full_capture'].stream.read()), 'r') as lookyloo_capture:
|
||||||
potential_favicons = set()
|
potential_favicons = set()
|
||||||
|
@ -1084,7 +1087,7 @@ def submit_capture():
|
||||||
# #############################################################
|
# #############################################################
|
||||||
|
|
||||||
@app.route('/capture', methods=['GET', 'POST'])
|
@app.route('/capture', methods=['GET', 'POST'])
|
||||||
def capture_web():
|
def capture_web() -> str | Response | WerkzeugResponse:
|
||||||
if flask_login.current_user.is_authenticated:
|
if flask_login.current_user.is_authenticated:
|
||||||
user = flask_login.current_user.get_id()
|
user = flask_login.current_user.get_id()
|
||||||
else:
|
else:
|
||||||
|
@ -1143,7 +1146,7 @@ def capture_web():
|
||||||
parsed_proxy = urlparse(request.form['proxy'])
|
parsed_proxy = urlparse(request.form['proxy'])
|
||||||
if parsed_proxy.scheme and parsed_proxy.hostname and parsed_proxy.port:
|
if parsed_proxy.scheme and parsed_proxy.hostname and parsed_proxy.port:
|
||||||
if parsed_proxy.scheme in ['http', 'https', 'socks5']:
|
if parsed_proxy.scheme in ['http', 'https', 'socks5']:
|
||||||
if (parsed_proxy.username and parsed_proxy.password) != (not parsed_proxy.username and not parsed_proxy.password):
|
if (parsed_proxy.username and parsed_proxy.password) or (not parsed_proxy.username and not parsed_proxy.password):
|
||||||
capture_query['proxy'] = request.form['proxy']
|
capture_query['proxy'] = request.form['proxy']
|
||||||
else:
|
else:
|
||||||
flash('You need to enter a username AND a password for your proxy.', 'error')
|
flash('You need to enter a username AND a password for your proxy.', 'error')
|
||||||
|
@ -1192,47 +1195,47 @@ def capture_web():
|
||||||
|
|
||||||
|
|
||||||
@app.route('/cookies/<string:cookie_name>', methods=['GET'])
|
@app.route('/cookies/<string:cookie_name>', methods=['GET'])
|
||||||
def cookies_name_detail(cookie_name: str):
|
def cookies_name_detail(cookie_name: str) -> str:
|
||||||
captures, domains = lookyloo.get_cookie_name_investigator(cookie_name.strip())
|
captures, domains = lookyloo.get_cookie_name_investigator(cookie_name.strip())
|
||||||
return render_template('cookie_name.html', cookie_name=cookie_name, domains=domains, captures=captures)
|
return render_template('cookie_name.html', cookie_name=cookie_name, domains=domains, captures=captures)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/hhhdetails/<string:hhh>', methods=['GET'])
|
@app.route('/hhhdetails/<string:hhh>', methods=['GET'])
|
||||||
def hhh_detail(hhh: str):
|
def hhh_detail(hhh: str) -> str:
|
||||||
captures, headers = lookyloo.get_hhh_investigator(hhh.strip())
|
captures, headers = lookyloo.get_hhh_investigator(hhh.strip())
|
||||||
return render_template('hhh_details.html', hhh=hhh, captures=captures, headers=headers)
|
return render_template('hhh_details.html', hhh=hhh, captures=captures, headers=headers)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/body_hashes/<string:body_hash>', methods=['GET'])
|
@app.route('/body_hashes/<string:body_hash>', methods=['GET'])
|
||||||
def body_hash_details(body_hash: str):
|
def body_hash_details(body_hash: str) -> str:
|
||||||
from_popup = True if (request.args.get('from_popup') and request.args.get('from_popup') == 'True') else False
|
from_popup = True if (request.args.get('from_popup') and request.args.get('from_popup') == 'True') else False
|
||||||
captures, domains = lookyloo.get_body_hash_investigator(body_hash.strip())
|
captures, domains = lookyloo.get_body_hash_investigator(body_hash.strip())
|
||||||
return render_template('body_hash.html', body_hash=body_hash, domains=domains, captures=captures, from_popup=from_popup)
|
return render_template('body_hash.html', body_hash=body_hash, domains=domains, captures=captures, from_popup=from_popup)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/urls/<string:url>', methods=['GET'])
|
@app.route('/urls/<string:url>', methods=['GET'])
|
||||||
def url_details(url: str):
|
def url_details(url: str) -> str:
|
||||||
url = unquote_plus(url).strip()
|
url = unquote_plus(url).strip()
|
||||||
hits = lookyloo.get_url_occurrences(url, limit=50)
|
hits = lookyloo.get_url_occurrences(url, limit=50)
|
||||||
return render_template('url.html', url=url, hits=hits)
|
return render_template('url.html', url=url, hits=hits)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/hostnames/<string:hostname>', methods=['GET'])
|
@app.route('/hostnames/<string:hostname>', methods=['GET'])
|
||||||
def hostname_details(hostname: str):
|
def hostname_details(hostname: str) -> str:
|
||||||
hits = lookyloo.get_hostname_occurrences(hostname.strip(), with_urls_occurrences=True, limit=50)
|
hits = lookyloo.get_hostname_occurrences(hostname.strip(), with_urls_occurrences=True, limit=50)
|
||||||
return render_template('hostname.html', hostname=hostname, hits=hits)
|
return render_template('hostname.html', hostname=hostname, hits=hits)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/stats', methods=['GET'])
|
@app.route('/stats', methods=['GET'])
|
||||||
def statsfull():
|
def statsfull() -> str:
|
||||||
stats = lookyloo.get_stats()
|
stats = lookyloo.get_stats()
|
||||||
return render_template('stats.html', stats=stats)
|
return render_template('stats.html', stats=stats)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/whois/<string:query>', methods=['GET'])
|
@app.route('/whois/<string:query>', methods=['GET'])
|
||||||
@app.route('/whois/<string:query>/<int:email_only>', methods=['GET'])
|
@app.route('/whois/<string:query>/<int:email_only>', methods=['GET'])
|
||||||
@file_response
|
@file_response # type: ignore[misc]
|
||||||
def whois(query: str, email_only: int=0):
|
def whois(query: str, email_only: int=0) -> Response:
|
||||||
to_return = lookyloo.uwhois.whois(query, bool(email_only))
|
to_return = lookyloo.uwhois.whois(query, bool(email_only))
|
||||||
if isinstance(to_return, str):
|
if isinstance(to_return, str):
|
||||||
return send_file(BytesIO(to_return.encode()),
|
return send_file(BytesIO(to_return.encode()),
|
||||||
|
@ -1243,35 +1246,35 @@ def whois(query: str, email_only: int=0):
|
||||||
# ##### Methods related to a specific URLNode #####
|
# ##### Methods related to a specific URLNode #####
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/request_cookies', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/request_cookies', methods=['GET'])
|
||||||
@file_response
|
@file_response # type: ignore[misc]
|
||||||
def urlnode_request_cookies(tree_uuid: str, node_uuid: str):
|
def urlnode_request_cookies(tree_uuid: str, node_uuid: str) -> Response | None:
|
||||||
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
||||||
if not urlnode.request_cookie:
|
if not urlnode.request_cookie:
|
||||||
return
|
return None
|
||||||
|
|
||||||
return send_file(BytesIO(json.dumps(urlnode.request_cookie, indent=2).encode()),
|
return send_file(BytesIO(json.dumps(urlnode.request_cookie, indent=2).encode()),
|
||||||
mimetype='text/plain', as_attachment=True, download_name='request_cookies.txt')
|
mimetype='text/plain', as_attachment=True, download_name='request_cookies.txt')
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/response_cookies', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/response_cookies', methods=['GET'])
|
||||||
@file_response
|
@file_response # type: ignore[misc]
|
||||||
def urlnode_response_cookies(tree_uuid: str, node_uuid: str):
|
def urlnode_response_cookies(tree_uuid: str, node_uuid: str) -> Response | None:
|
||||||
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
||||||
if not urlnode.response_cookie:
|
if not urlnode.response_cookie:
|
||||||
return
|
return None
|
||||||
|
|
||||||
return send_file(BytesIO(json.dumps(urlnode.response_cookie, indent=2).encode()),
|
return send_file(BytesIO(json.dumps(urlnode.response_cookie, indent=2).encode()),
|
||||||
mimetype='text/plain', as_attachment=True, download_name='response_cookies.txt')
|
mimetype='text/plain', as_attachment=True, download_name='response_cookies.txt')
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/urls_in_rendered_content', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/urls_in_rendered_content', methods=['GET'])
|
||||||
@file_response
|
@file_response # type: ignore[misc]
|
||||||
def urlnode_urls_in_rendered_content(tree_uuid: str, node_uuid: str):
|
def urlnode_urls_in_rendered_content(tree_uuid: str, node_uuid: str) -> Response | None:
|
||||||
# Note: we could simplify it with lookyloo.get_urls_rendered_page, but if at somepoint,
|
# Note: we could simplify it with lookyloo.get_urls_rendered_page, but if at somepoint,
|
||||||
# we have multiple page rendered on one tree, it will be a problem.
|
# we have multiple page rendered on one tree, it will be a problem.
|
||||||
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
||||||
if not hasattr(urlnode, 'rendered_html') or not urlnode.rendered_html:
|
if not hasattr(urlnode, 'rendered_html') or not urlnode.rendered_html:
|
||||||
return
|
return None
|
||||||
|
|
||||||
ct = lookyloo.get_crawled_tree(tree_uuid)
|
ct = lookyloo.get_crawled_tree(tree_uuid)
|
||||||
not_loaded_urls = sorted(set(urlnode.urls_in_rendered_page)
|
not_loaded_urls = sorted(set(urlnode.urls_in_rendered_page)
|
||||||
|
@ -1283,22 +1286,22 @@ def urlnode_urls_in_rendered_content(tree_uuid: str, node_uuid: str):
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/rendered_content', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/rendered_content', methods=['GET'])
|
||||||
@file_response
|
@file_response # type: ignore[misc]
|
||||||
def urlnode_rendered_content(tree_uuid: str, node_uuid: str):
|
def urlnode_rendered_content(tree_uuid: str, node_uuid: str) -> Response | None:
|
||||||
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
||||||
if not urlnode.rendered_html:
|
if not urlnode.rendered_html:
|
||||||
return
|
return None
|
||||||
return send_file(BytesIO(urlnode.rendered_html.getvalue()), mimetype='text/plain',
|
return send_file(BytesIO(urlnode.rendered_html.getvalue()), mimetype='text/plain',
|
||||||
as_attachment=True, download_name='rendered_content.txt')
|
as_attachment=True, download_name='rendered_content.txt')
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/posted_data', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/posted_data', methods=['GET'])
|
||||||
@file_response
|
@file_response # type: ignore[misc]
|
||||||
def urlnode_post_request(tree_uuid: str, node_uuid: str):
|
def urlnode_post_request(tree_uuid: str, node_uuid: str) -> Response | None:
|
||||||
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
||||||
if not urlnode.posted_data:
|
if not urlnode.posted_data:
|
||||||
return
|
return None
|
||||||
posted: Union[str, bytes]
|
posted: str | bytes
|
||||||
if isinstance(urlnode.posted_data, (dict, list)):
|
if isinstance(urlnode.posted_data, (dict, list)):
|
||||||
# JSON blob, pretty print.
|
# JSON blob, pretty print.
|
||||||
posted = json.dumps(urlnode.posted_data, indent=2)
|
posted = json.dumps(urlnode.posted_data, indent=2)
|
||||||
|
@ -1322,8 +1325,8 @@ def urlnode_post_request(tree_uuid: str, node_uuid: str):
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/ressource', methods=['POST', 'GET'])
|
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/ressource', methods=['POST', 'GET'])
|
||||||
@file_response
|
@file_response # type: ignore[misc]
|
||||||
def get_ressource(tree_uuid: str, node_uuid: str):
|
def get_ressource(tree_uuid: str, node_uuid: str) -> Response:
|
||||||
if request.method == 'POST':
|
if request.method == 'POST':
|
||||||
h_request = request.form.get('ressource_hash')
|
h_request = request.form.get('ressource_hash')
|
||||||
else:
|
else:
|
||||||
|
@ -1343,8 +1346,8 @@ def get_ressource(tree_uuid: str, node_uuid: str):
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/ressource_preview', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/ressource_preview', methods=['GET'])
|
||||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/ressource_preview/<string:h_ressource>', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/ressource_preview/<string:h_ressource>', methods=['GET'])
|
||||||
@file_response
|
@file_response # type: ignore[misc]
|
||||||
def get_ressource_preview(tree_uuid: str, node_uuid: str, h_ressource: Optional[str]=None):
|
def get_ressource_preview(tree_uuid: str, node_uuid: str, h_ressource: str | None=None) -> Response:
|
||||||
ressource = lookyloo.get_ressource(tree_uuid, node_uuid, h_ressource)
|
ressource = lookyloo.get_ressource(tree_uuid, node_uuid, h_ressource)
|
||||||
if not ressource:
|
if not ressource:
|
||||||
return Response('No preview available.', mimetype='text/text')
|
return Response('No preview available.', mimetype='text/text')
|
||||||
|
@ -1356,16 +1359,16 @@ def get_ressource_preview(tree_uuid: str, node_uuid: str, h_ressource: Optional[
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/hashes', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/hashes', methods=['GET'])
|
||||||
@file_response
|
@file_response # type: ignore[misc]
|
||||||
def hashes_urlnode(tree_uuid: str, node_uuid: str):
|
def hashes_urlnode(tree_uuid: str, node_uuid: str) -> Response:
|
||||||
hashes = lookyloo.get_hashes(tree_uuid, urlnode_uuid=node_uuid)
|
hashes = lookyloo.get_hashes(tree_uuid, urlnode_uuid=node_uuid)
|
||||||
return send_file(BytesIO('\n'.join(hashes).encode()),
|
return send_file(BytesIO('\n'.join(hashes).encode()),
|
||||||
mimetype='test/plain', as_attachment=True, download_name='hashes.txt')
|
mimetype='test/plain', as_attachment=True, download_name='hashes.txt')
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/add_context', methods=['POST'])
|
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/add_context', methods=['POST'])
|
||||||
@flask_login.login_required
|
@flask_login.login_required # type: ignore[misc]
|
||||||
def add_context(tree_uuid: str, node_uuid: str):
|
def add_context(tree_uuid: str, node_uuid: str) -> WerkzeugResponse | None:
|
||||||
if not enable_context_by_users:
|
if not enable_context_by_users:
|
||||||
return redirect(url_for('ressources'))
|
return redirect(url_for('ressources'))
|
||||||
|
|
||||||
|
@ -1375,7 +1378,7 @@ def add_context(tree_uuid: str, node_uuid: str):
|
||||||
callback_str: str = context_data['callback_str']
|
callback_str: str = context_data['callback_str']
|
||||||
legitimate: bool = True if context_data.get('legitimate') else False
|
legitimate: bool = True if context_data.get('legitimate') else False
|
||||||
malicious: bool = True if context_data.get('malicious') else False
|
malicious: bool = True if context_data.get('malicious') else False
|
||||||
details: Dict[str, Dict] = {'malicious': {}, 'legitimate': {}}
|
details: dict[str, dict[str, Any]] = {'malicious': {}, 'legitimate': {}}
|
||||||
if malicious:
|
if malicious:
|
||||||
malicious_details = {}
|
malicious_details = {}
|
||||||
if context_data.get('malicious_type'):
|
if context_data.get('malicious_type'):
|
||||||
|
@ -1396,6 +1399,7 @@ def add_context(tree_uuid: str, node_uuid: str):
|
||||||
return redirect(url_for('hostnode_popup', tree_uuid=tree_uuid, node_uuid=hostnode_uuid))
|
return redirect(url_for('hostnode_popup', tree_uuid=tree_uuid, node_uuid=hostnode_uuid))
|
||||||
elif callback_str == 'ressources':
|
elif callback_str == 'ressources':
|
||||||
return redirect(url_for('ressources'))
|
return redirect(url_for('ressources'))
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
# Query API
|
# Query API
|
||||||
|
|
|
@ -1,20 +1,22 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Dict, Optional, Tuple, List
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
|
|
||||||
import flask_login # type: ignore
|
import flask_login # type: ignore
|
||||||
from flask import request, send_file
|
from flask import request, send_file, Response
|
||||||
from flask_restx import Namespace, Resource, abort, fields # type: ignore
|
from flask_restx import Namespace, Resource, abort, fields # type: ignore
|
||||||
from werkzeug.security import check_password_hash
|
from werkzeug.security import check_password_hash
|
||||||
|
|
||||||
from lacuscore import CaptureStatus as CaptureStatusCore
|
from lacuscore import CaptureStatus as CaptureStatusCore
|
||||||
from pylacus import CaptureStatus as CaptureStatusPy
|
from pylacus import CaptureStatus as CaptureStatusPy # type: ignore[attr-defined]
|
||||||
from lookyloo.comparator import Comparator
|
from lookyloo.comparator import Comparator
|
||||||
from lookyloo.exceptions import MissingUUID, NoValidHarFile
|
from lookyloo.exceptions import MissingUUID, NoValidHarFile
|
||||||
from lookyloo.lookyloo import CaptureSettings, Lookyloo
|
from lookyloo.lookyloo import CaptureSettings, Lookyloo
|
||||||
|
@ -27,7 +29,7 @@ lookyloo: Lookyloo = get_lookyloo_instance()
|
||||||
comparator: Comparator = Comparator()
|
comparator: Comparator = Comparator()
|
||||||
|
|
||||||
|
|
||||||
def api_auth_check(method):
|
def api_auth_check(method): # type: ignore
|
||||||
if flask_login.current_user.is_authenticated or load_user_from_request(request):
|
if flask_login.current_user.is_authenticated or load_user_from_request(request):
|
||||||
return method
|
return method
|
||||||
abort(403, 'Authentication required.')
|
abort(403, 'Authentication required.')
|
||||||
|
@ -39,30 +41,30 @@ token_request_fields = api.model('AuthTokenFields', {
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
@api.errorhandler(NoValidHarFile)
|
@api.errorhandler(NoValidHarFile) # type: ignore[misc]
|
||||||
def handle_no_HAR_file_exception(error):
|
def handle_no_HAR_file_exception(error: Any) -> tuple[dict[str, str], int]:
|
||||||
'''The capture has no HAR file, it failed for some reason.'''
|
'''The capture has no HAR file, it failed for some reason.'''
|
||||||
return {'message': str(error)}, 400
|
return {'message': str(error)}, 400
|
||||||
|
|
||||||
|
|
||||||
@api.route('/json/get_token')
|
@api.route('/json/get_token')
|
||||||
@api.doc(description='Get the API token required for authenticated calls')
|
@api.doc(description='Get the API token required for authenticated calls')
|
||||||
class AuthToken(Resource):
|
class AuthToken(Resource): # type: ignore[misc]
|
||||||
|
|
||||||
users_table = build_users_table()
|
users_table = build_users_table()
|
||||||
|
|
||||||
@api.param('username', 'Your username')
|
@api.param('username', 'Your username') # type: ignore[misc]
|
||||||
@api.param('password', 'Your password')
|
@api.param('password', 'Your password') # type: ignore[misc]
|
||||||
def get(self):
|
def get(self) -> dict[str, str] | tuple[dict[str, str], int]:
|
||||||
username: Optional[str] = request.args['username'] if request.args.get('username') else None
|
username: str | None = request.args['username'] if request.args.get('username') else None
|
||||||
password: Optional[str] = request.args['password'] if request.args.get('password') else None
|
password: str | None = request.args['password'] if request.args.get('password') else None
|
||||||
if username and password and username in self.users_table and check_password_hash(self.users_table[username]['password'], password):
|
if username and password and username in self.users_table and check_password_hash(self.users_table[username]['password'], password):
|
||||||
return {'authkey': self.users_table[username]['authkey']}
|
return {'authkey': self.users_table[username]['authkey']}
|
||||||
return {'error': 'User/Password invalid.'}, 401
|
return {'error': 'User/Password invalid.'}, 401
|
||||||
|
|
||||||
@api.doc(body=token_request_fields)
|
@api.doc(body=token_request_fields) # type: ignore[misc]
|
||||||
def post(self):
|
def post(self) -> dict[str, str] | tuple[dict[str, str], int]:
|
||||||
auth: Dict = request.get_json(force=True)
|
auth: dict[str, Any] = request.get_json(force=True)
|
||||||
if 'username' in auth and 'password' in auth: # Expected keys in json
|
if 'username' in auth and 'password' in auth: # Expected keys in json
|
||||||
if (auth['username'] in self.users_table
|
if (auth['username'] in self.users_table
|
||||||
and check_password_hash(self.users_table[auth['username']]['password'], auth['password'])):
|
and check_password_hash(self.users_table[auth['username']]['password'], auth['password'])):
|
||||||
|
@ -73,13 +75,13 @@ class AuthToken(Resource):
|
||||||
@api.route('/json/<string:capture_uuid>/status')
|
@api.route('/json/<string:capture_uuid>/status')
|
||||||
@api.doc(description='Get the status of a capture',
|
@api.doc(description='Get the status of a capture',
|
||||||
params={'capture_uuid': 'The UUID of the capture'})
|
params={'capture_uuid': 'The UUID of the capture'})
|
||||||
class CaptureStatusQuery(Resource):
|
class CaptureStatusQuery(Resource): # type: ignore[misc]
|
||||||
|
|
||||||
@api.param('with_error', 'Add the error message of the capture (if there is one)')
|
@api.param('with_error', 'Add the error message of the capture (if there is one)') # type: ignore[misc]
|
||||||
def get(self, capture_uuid: str):
|
def get(self, capture_uuid: str) -> dict[str, Any]:
|
||||||
with_error: bool = True if request.args.get('with_error') else False
|
with_error: bool = True if request.args.get('with_error') else False
|
||||||
status_code = lookyloo.get_capture_status(capture_uuid)
|
status_code = lookyloo.get_capture_status(capture_uuid)
|
||||||
to_return: Dict[str, Any] = {'status_code': status_code}
|
to_return: dict[str, Any] = {'status_code': status_code}
|
||||||
if status_code in [CaptureStatusCore.DONE, CaptureStatusPy.DONE] and with_error:
|
if status_code in [CaptureStatusCore.DONE, CaptureStatusPy.DONE] and with_error:
|
||||||
cache = lookyloo.capture_cache(capture_uuid)
|
cache = lookyloo.capture_cache(capture_uuid)
|
||||||
if cache and cache.error:
|
if cache and cache.error:
|
||||||
|
@ -90,40 +92,40 @@ class CaptureStatusQuery(Resource):
|
||||||
@api.route('/json/<string:capture_uuid>/hostnames')
|
@api.route('/json/<string:capture_uuid>/hostnames')
|
||||||
@api.doc(description='Get all the hostnames of all the resources of a capture',
|
@api.doc(description='Get all the hostnames of all the resources of a capture',
|
||||||
params={'capture_uuid': 'The UUID of the capture'})
|
params={'capture_uuid': 'The UUID of the capture'})
|
||||||
class CaptureHostnames(Resource):
|
class CaptureHostnames(Resource): # type: ignore[misc]
|
||||||
def get(self, capture_uuid: str):
|
def get(self, capture_uuid: str) -> dict[str, Any] | tuple[dict[str, Any], int]:
|
||||||
cache = lookyloo.capture_cache(capture_uuid)
|
cache = lookyloo.capture_cache(capture_uuid)
|
||||||
if not cache:
|
if not cache:
|
||||||
return {'error': 'UUID missing in cache, try again later and check the status first.'}, 400
|
return {'error': 'UUID missing in cache, try again later and check the status first.'}, 400
|
||||||
to_return: Dict[str, Any] = {'response': {'hostnames': list(lookyloo.get_hostnames(capture_uuid))}}
|
to_return: dict[str, Any] = {'response': {'hostnames': list(lookyloo.get_hostnames(capture_uuid))}}
|
||||||
return to_return
|
return to_return
|
||||||
|
|
||||||
|
|
||||||
@api.route('/json/<string:capture_uuid>/urls')
|
@api.route('/json/<string:capture_uuid>/urls')
|
||||||
@api.doc(description='Get all the URLs of all the resources of a capture',
|
@api.doc(description='Get all the URLs of all the resources of a capture',
|
||||||
params={'capture_uuid': 'The UUID of the capture'})
|
params={'capture_uuid': 'The UUID of the capture'})
|
||||||
class CaptureURLs(Resource):
|
class CaptureURLs(Resource): # type: ignore[misc]
|
||||||
def get(self, capture_uuid: str):
|
def get(self, capture_uuid: str) -> dict[str, Any] | tuple[dict[str, Any], int]:
|
||||||
cache = lookyloo.capture_cache(capture_uuid)
|
cache = lookyloo.capture_cache(capture_uuid)
|
||||||
if not cache:
|
if not cache:
|
||||||
return {'error': 'UUID missing in cache, try again later and check the status first.'}, 400
|
return {'error': 'UUID missing in cache, try again later and check the status first.'}, 400
|
||||||
to_return: Dict[str, Any] = {'response': {'urls': list(lookyloo.get_urls(capture_uuid))}}
|
to_return: dict[str, Any] = {'response': {'urls': list(lookyloo.get_urls(capture_uuid))}}
|
||||||
return to_return
|
return to_return
|
||||||
|
|
||||||
|
|
||||||
@api.route('/json/<string:capture_uuid>/hashes')
|
@api.route('/json/<string:capture_uuid>/hashes')
|
||||||
@api.doc(description='Get all the hashes of all the resources of a capture',
|
@api.doc(description='Get all the hashes of all the resources of a capture',
|
||||||
params={'capture_uuid': 'The UUID of the capture'})
|
params={'capture_uuid': 'The UUID of the capture'})
|
||||||
class CaptureHashes(Resource):
|
class CaptureHashes(Resource): # type: ignore[misc]
|
||||||
# Note: shake algos require a length for the digest, discarding them.
|
# Note: shake algos require a length for the digest, discarding them.
|
||||||
supported_hash_algos = [algo for algo in hashlib.algorithms_available if not algo.startswith('shake')]
|
supported_hash_algos = [algo for algo in hashlib.algorithms_available if not algo.startswith('shake')]
|
||||||
|
|
||||||
# NOTE: the SHA512 hashes are pre-computed in the tree, anything else must be computed on the spot
|
# NOTE: the SHA512 hashes are pre-computed in the tree, anything else must be computed on the spot
|
||||||
# so we return the SHA512 hashes by default
|
# so we return the SHA512 hashes by default
|
||||||
|
|
||||||
@api.param('algorithm', default='sha512', description=f'Algorithm of the hashes (default: sha512). Supported options: {", ".join(supported_hash_algos)}')
|
@api.param('algorithm', default='sha512', description=f'Algorithm of the hashes (default: sha512). Supported options: {", ".join(supported_hash_algos)}') # type: ignore[misc]
|
||||||
@api.param('hashes_only', default=1, description='If 1 (default), only returns a list hashes instead of a dictionary of hashes with their respective URLs..')
|
@api.param('hashes_only', default=1, description='If 1 (default), only returns a list hashes instead of a dictionary of hashes with their respective URLs..') # type: ignore[misc]
|
||||||
def get(self, capture_uuid: str):
|
def get(self, capture_uuid: str) -> dict[str, Any] | tuple[dict[str, Any], int]:
|
||||||
cache = lookyloo.capture_cache(capture_uuid)
|
cache = lookyloo.capture_cache(capture_uuid)
|
||||||
if not cache:
|
if not cache:
|
||||||
return {'error': 'UUID missing in cache, try again later and check the status first.'}, 400
|
return {'error': 'UUID missing in cache, try again later and check the status first.'}, 400
|
||||||
|
@ -131,7 +133,7 @@ class CaptureHashes(Resource):
|
||||||
algorithm = request.args['algorithm'].lower() if request.args.get('algorithm') else 'sha512'
|
algorithm = request.args['algorithm'].lower() if request.args.get('algorithm') else 'sha512'
|
||||||
hashes_only = False if 'hashes_only' in request.args and request.args['hashes_only'] in [0, '0'] else True
|
hashes_only = False if 'hashes_only' in request.args and request.args['hashes_only'] in [0, '0'] else True
|
||||||
if algorithm == 'sha512' and hashes_only:
|
if algorithm == 'sha512' and hashes_only:
|
||||||
to_return: Dict[str, Any] = {'response': {'hashes': list(lookyloo.get_hashes(capture_uuid))}}
|
to_return: dict[str, Any] = {'response': {'hashes': list(lookyloo.get_hashes(capture_uuid))}}
|
||||||
else:
|
else:
|
||||||
hashes = lookyloo.get_hashes_with_context(capture_uuid, algorithm=algorithm, urls_only=True)
|
hashes = lookyloo.get_hashes_with_context(capture_uuid, algorithm=algorithm, urls_only=True)
|
||||||
to_return = {'response': {'hashes': list(hashes.keys())}}
|
to_return = {'response': {'hashes': list(hashes.keys())}}
|
||||||
|
@ -143,13 +145,13 @@ class CaptureHashes(Resource):
|
||||||
@api.route('/json/<string:capture_uuid>/redirects')
|
@api.route('/json/<string:capture_uuid>/redirects')
|
||||||
@api.doc(description='Get all the redirects of a capture',
|
@api.doc(description='Get all the redirects of a capture',
|
||||||
params={'capture_uuid': 'The UUID of the capture'})
|
params={'capture_uuid': 'The UUID of the capture'})
|
||||||
class CaptureRedirects(Resource):
|
class CaptureRedirects(Resource): # type: ignore[misc]
|
||||||
def get(self, capture_uuid: str):
|
def get(self, capture_uuid: str) -> dict[str, Any] | tuple[dict[str, Any], int]:
|
||||||
cache = lookyloo.capture_cache(capture_uuid)
|
cache = lookyloo.capture_cache(capture_uuid)
|
||||||
if not cache:
|
if not cache:
|
||||||
return {'error': 'UUID missing in cache, try again later and check the status first.'}, 400
|
return {'error': 'UUID missing in cache, try again later and check the status first.'}, 400
|
||||||
|
|
||||||
to_return: Dict[str, Any] = {}
|
to_return: dict[str, Any] = {}
|
||||||
try:
|
try:
|
||||||
to_return = {'response': {'url': cache.url,
|
to_return = {'response': {'url': cache.url,
|
||||||
'redirects': cache.redirects if cache.redirects else []}}
|
'redirects': cache.redirects if cache.redirects else []}}
|
||||||
|
@ -166,8 +168,8 @@ class CaptureRedirects(Resource):
|
||||||
@api.route('/json/<string:capture_uuid>/misp_export')
|
@api.route('/json/<string:capture_uuid>/misp_export')
|
||||||
@api.doc(description='Get an export of the capture in MISP format',
|
@api.doc(description='Get an export of the capture in MISP format',
|
||||||
params={'capture_uuid': 'The UUID of the capture'})
|
params={'capture_uuid': 'The UUID of the capture'})
|
||||||
class MISPExport(Resource):
|
class MISPExport(Resource): # type: ignore[misc]
|
||||||
def get(self, capture_uuid: str):
|
def get(self, capture_uuid: str) -> dict[str, Any] | list[dict[str, Any]]:
|
||||||
with_parents = request.args.get('with_parents')
|
with_parents = request.args.get('with_parents')
|
||||||
event = lookyloo.misp_export(capture_uuid, True if with_parents else False)
|
event = lookyloo.misp_export(capture_uuid, True if with_parents else False)
|
||||||
if isinstance(event, dict):
|
if isinstance(event, dict):
|
||||||
|
@ -192,12 +194,12 @@ misp_push_fields = api.model('MISPPushFields', {
|
||||||
@api.doc(description='Push an event to a pre-configured MISP instance',
|
@api.doc(description='Push an event to a pre-configured MISP instance',
|
||||||
params={'capture_uuid': 'The UUID of the capture'},
|
params={'capture_uuid': 'The UUID of the capture'},
|
||||||
security='apikey')
|
security='apikey')
|
||||||
class MISPPush(Resource):
|
class MISPPush(Resource): # type: ignore[misc]
|
||||||
method_decorators = [api_auth_check]
|
method_decorators = [api_auth_check]
|
||||||
|
|
||||||
@api.param('with_parents', 'Also push the parents of the capture (if any)')
|
@api.param('with_parents', 'Also push the parents of the capture (if any)') # type: ignore[misc]
|
||||||
@api.param('allow_duplicates', 'Push the event even if it is already present on the MISP instance')
|
@api.param('allow_duplicates', 'Push the event even if it is already present on the MISP instance') # type: ignore[misc]
|
||||||
def get(self, capture_uuid: str, instance_name: Optional[str]=None):
|
def get(self, capture_uuid: str, instance_name: str | None=None) -> dict[str, Any] | list[dict[str, Any]]:
|
||||||
with_parents = True if request.args.get('with_parents') else False
|
with_parents = True if request.args.get('with_parents') else False
|
||||||
allow_duplicates = True if request.args.get('allow_duplicates') else False
|
allow_duplicates = True if request.args.get('allow_duplicates') else False
|
||||||
|
|
||||||
|
@ -208,7 +210,7 @@ class MISPPush(Resource):
|
||||||
else:
|
else:
|
||||||
return {'error': f'MISP instance "{instance_name}" does not exists.'}
|
return {'error': f'MISP instance "{instance_name}" does not exists.'}
|
||||||
|
|
||||||
to_return: Dict = {}
|
to_return: dict[str, Any] = {}
|
||||||
if not misp.available:
|
if not misp.available:
|
||||||
to_return['error'] = 'MISP module not available.'
|
to_return['error'] = 'MISP module not available.'
|
||||||
elif not misp.enable_push:
|
elif not misp.enable_push:
|
||||||
|
@ -229,9 +231,9 @@ class MISPPush(Resource):
|
||||||
|
|
||||||
return to_return
|
return to_return
|
||||||
|
|
||||||
@api.doc(body=misp_push_fields)
|
@api.doc(body=misp_push_fields) # type: ignore[misc]
|
||||||
def post(self, capture_uuid: str, instance_name: Optional[str]=None):
|
def post(self, capture_uuid: str, instance_name: str | None=None) -> dict[str, Any] | list[dict[str, Any]]:
|
||||||
parameters: Dict = request.get_json(force=True)
|
parameters: dict[str, Any] = request.get_json(force=True)
|
||||||
with_parents = True if parameters.get('with_parents') else False
|
with_parents = True if parameters.get('with_parents') else False
|
||||||
allow_duplicates = True if parameters.get('allow_duplicates') else False
|
allow_duplicates = True if parameters.get('allow_duplicates') else False
|
||||||
if instance_name is None:
|
if instance_name is None:
|
||||||
|
@ -241,7 +243,7 @@ class MISPPush(Resource):
|
||||||
else:
|
else:
|
||||||
return {'error': f'MISP instance "{instance_name}" does not exists.'}
|
return {'error': f'MISP instance "{instance_name}" does not exists.'}
|
||||||
|
|
||||||
to_return: Dict = {}
|
to_return: dict[str, Any] = {}
|
||||||
if not misp.available:
|
if not misp.available:
|
||||||
to_return['error'] = 'MISP module not available.'
|
to_return['error'] = 'MISP module not available.'
|
||||||
elif not misp.enable_push:
|
elif not misp.enable_push:
|
||||||
|
@ -272,10 +274,10 @@ trigger_modules_fields = api.model('TriggerModulesFields', {
|
||||||
@api.route('/json/<string:capture_uuid>/trigger_modules')
|
@api.route('/json/<string:capture_uuid>/trigger_modules')
|
||||||
@api.doc(description='Trigger all the available 3rd party modules on the given capture',
|
@api.doc(description='Trigger all the available 3rd party modules on the given capture',
|
||||||
params={'capture_uuid': 'The UUID of the capture'})
|
params={'capture_uuid': 'The UUID of the capture'})
|
||||||
class TriggerModules(Resource):
|
class TriggerModules(Resource): # type: ignore[misc]
|
||||||
@api.doc(body=trigger_modules_fields)
|
@api.doc(body=trigger_modules_fields) # type: ignore[misc]
|
||||||
def post(self, capture_uuid: str):
|
def post(self, capture_uuid: str) -> dict[str, Any]:
|
||||||
parameters: Dict = request.get_json(force=True)
|
parameters: dict[str, Any] = request.get_json(force=True)
|
||||||
force = True if parameters.get('force') else False
|
force = True if parameters.get('force') else False
|
||||||
return lookyloo.trigger_modules(capture_uuid, force=force)
|
return lookyloo.trigger_modules(capture_uuid, force=force)
|
||||||
|
|
||||||
|
@ -283,12 +285,12 @@ class TriggerModules(Resource):
|
||||||
@api.route('/json/hash_info/<h>')
|
@api.route('/json/hash_info/<h>')
|
||||||
@api.doc(description='Search for a ressource with a specific hash (sha512)',
|
@api.doc(description='Search for a ressource with a specific hash (sha512)',
|
||||||
params={'h': 'The hash (sha512)'})
|
params={'h': 'The hash (sha512)'})
|
||||||
class HashInfo(Resource):
|
class HashInfo(Resource): # type: ignore[misc]
|
||||||
def get(self, h: str):
|
def get(self, h: str) -> dict[str, Any] | tuple[dict[str, Any], int]:
|
||||||
details, body = lookyloo.get_body_hash_full(h)
|
details, body = lookyloo.get_body_hash_full(h)
|
||||||
if not details:
|
if not details:
|
||||||
return {'error': 'Unknown Hash.'}, 400
|
return {'error': 'Unknown Hash.'}, 400
|
||||||
to_return: Dict[str, Any] = {'response': {'hash': h, 'details': details,
|
to_return: dict[str, Any] = {'response': {'hash': h, 'details': details,
|
||||||
'body': base64.b64encode(body.getvalue()).decode()}}
|
'body': base64.b64encode(body.getvalue()).decode()}}
|
||||||
return to_return
|
return to_return
|
||||||
|
|
||||||
|
@ -302,11 +304,11 @@ url_info_fields = api.model('URLInfoFields', {
|
||||||
|
|
||||||
@api.route('/json/url_info')
|
@api.route('/json/url_info')
|
||||||
@api.doc(description='Search for a URL')
|
@api.doc(description='Search for a URL')
|
||||||
class URLInfo(Resource):
|
class URLInfo(Resource): # type: ignore[misc]
|
||||||
|
|
||||||
@api.doc(body=url_info_fields)
|
@api.doc(body=url_info_fields) # type: ignore[misc]
|
||||||
def post(self):
|
def post(self) -> list[dict[str, Any]]:
|
||||||
to_query: Dict = request.get_json(force=True)
|
to_query: dict[str, Any] = request.get_json(force=True)
|
||||||
occurrences = lookyloo.get_url_occurrences(to_query.pop('url'), **to_query)
|
occurrences = lookyloo.get_url_occurrences(to_query.pop('url'), **to_query)
|
||||||
return occurrences
|
return occurrences
|
||||||
|
|
||||||
|
@ -320,51 +322,50 @@ hostname_info_fields = api.model('HostnameInfoFields', {
|
||||||
|
|
||||||
@api.route('/json/hostname_info')
|
@api.route('/json/hostname_info')
|
||||||
@api.doc(description='Search for a hostname')
|
@api.doc(description='Search for a hostname')
|
||||||
class HostnameInfo(Resource):
|
class HostnameInfo(Resource): # type: ignore[misc]
|
||||||
|
|
||||||
@api.doc(body=hostname_info_fields)
|
@api.doc(body=hostname_info_fields) # type: ignore[misc]
|
||||||
def post(self):
|
def post(self) -> list[dict[str, Any]]:
|
||||||
to_query: Dict = request.get_json(force=True)
|
to_query: dict[str, Any] = request.get_json(force=True)
|
||||||
occurrences = lookyloo.get_hostname_occurrences(to_query.pop('hostname'), **to_query)
|
return lookyloo.get_hostname_occurrences(to_query.pop('hostname'), **to_query)
|
||||||
return occurrences
|
|
||||||
|
|
||||||
|
|
||||||
@api.route('/json/stats')
|
@api.route('/json/stats')
|
||||||
@api.doc(description='Get the statistics of the lookyloo instance.')
|
@api.doc(description='Get the statistics of the lookyloo instance.')
|
||||||
class InstanceStats(Resource):
|
class InstanceStats(Resource): # type: ignore[misc]
|
||||||
def get(self):
|
def get(self) -> dict[str, Any]:
|
||||||
return lookyloo.get_stats()
|
return lookyloo.get_stats()
|
||||||
|
|
||||||
|
|
||||||
@api.route('/json/devices')
|
@api.route('/json/devices')
|
||||||
@api.doc(description='Get the list of devices pre-configured on the platform')
|
@api.doc(description='Get the list of devices pre-configured on the platform')
|
||||||
class Devices(Resource):
|
class Devices(Resource): # type: ignore[misc]
|
||||||
|
|
||||||
def get(self):
|
def get(self) -> dict[str, Any]:
|
||||||
return lookyloo.get_playwright_devices()
|
return lookyloo.get_playwright_devices()
|
||||||
|
|
||||||
|
|
||||||
@api.route('/json/<string:capture_uuid>/stats')
|
@api.route('/json/<string:capture_uuid>/stats')
|
||||||
@api.doc(description='Get the statistics of the capture.',
|
@api.doc(description='Get the statistics of the capture.',
|
||||||
params={'capture_uuid': 'The UUID of the capture'})
|
params={'capture_uuid': 'The UUID of the capture'})
|
||||||
class CaptureStats(Resource):
|
class CaptureStats(Resource): # type: ignore[misc]
|
||||||
def get(self, capture_uuid: str):
|
def get(self, capture_uuid: str) -> dict[str, Any]:
|
||||||
return lookyloo.get_statistics(capture_uuid)
|
return lookyloo.get_statistics(capture_uuid)
|
||||||
|
|
||||||
|
|
||||||
@api.route('/json/<string:capture_uuid>/info')
|
@api.route('/json/<string:capture_uuid>/info')
|
||||||
@api.doc(description='Get basic information about the capture.',
|
@api.doc(description='Get basic information about the capture.',
|
||||||
params={'capture_uuid': 'The UUID of the capture'})
|
params={'capture_uuid': 'The UUID of the capture'})
|
||||||
class CaptureInfo(Resource):
|
class CaptureInfo(Resource): # type: ignore[misc]
|
||||||
def get(self, capture_uuid: str):
|
def get(self, capture_uuid: str) -> dict[str, Any]:
|
||||||
return lookyloo.get_info(capture_uuid)
|
return lookyloo.get_info(capture_uuid)
|
||||||
|
|
||||||
|
|
||||||
@api.route('/json/<string:capture_uuid>/cookies')
|
@api.route('/json/<string:capture_uuid>/cookies')
|
||||||
@api.doc(description='Get the complete cookie jar created during the capture.',
|
@api.doc(description='Get the complete cookie jar created during the capture.',
|
||||||
params={'capture_uuid': 'The UUID of the capture'})
|
params={'capture_uuid': 'The UUID of the capture'})
|
||||||
class CaptureCookies(Resource):
|
class CaptureCookies(Resource): # type: ignore[misc]
|
||||||
def get(self, capture_uuid: str):
|
def get(self, capture_uuid: str) -> dict[str, Any]:
|
||||||
return json.loads(lookyloo.get_cookies(capture_uuid).read())
|
return json.loads(lookyloo.get_cookies(capture_uuid).read())
|
||||||
|
|
||||||
|
|
||||||
|
@ -392,17 +393,17 @@ submit_fields_post = api.model('SubmitFieldsPost', {
|
||||||
|
|
||||||
|
|
||||||
@api.route('/submit')
|
@api.route('/submit')
|
||||||
class SubmitCapture(Resource):
|
class SubmitCapture(Resource): # type: ignore[misc]
|
||||||
|
|
||||||
@api.param('url', 'The URL to capture', required=True)
|
@api.param('url', 'The URL to capture', required=True) # type: ignore[misc]
|
||||||
@api.param('listing', 'Display the capture on the index', default=1)
|
@api.param('listing', 'Display the capture on the index', default=1) # type: ignore[misc]
|
||||||
@api.param('user_agent', 'User agent to use for the capture')
|
@api.param('user_agent', 'User agent to use for the capture') # type: ignore[misc]
|
||||||
@api.param('browser_name', 'Use this browser. Must be chromium, firefox or webkit.')
|
@api.param('browser_name', 'Use this browser. Must be chromium, firefox or webkit.') # type: ignore[misc]
|
||||||
@api.param('device_name', 'Use the pre-configured settings for this device')
|
@api.param('device_name', 'Use the pre-configured settings for this device') # type: ignore[misc]
|
||||||
@api.param('referer', 'Referer to pass to the capture')
|
@api.param('referer', 'Referer to pass to the capture') # type: ignore[misc]
|
||||||
@api.param('proxy', 'Proxy to use for the the capture')
|
@api.param('proxy', 'Proxy to use for the the capture') # type: ignore[misc]
|
||||||
@api.produces(['text/text'])
|
@api.produces(['text/text']) # type: ignore[misc]
|
||||||
def get(self):
|
def get(self) -> str | tuple[str, int]:
|
||||||
if flask_login.current_user.is_authenticated:
|
if flask_login.current_user.is_authenticated:
|
||||||
user = flask_login.current_user.get_id()
|
user = flask_login.current_user.get_id()
|
||||||
else:
|
else:
|
||||||
|
@ -430,9 +431,9 @@ class SubmitCapture(Resource):
|
||||||
perma_uuid = lookyloo.enqueue_capture(to_query, source='api', user=user, authenticated=flask_login.current_user.is_authenticated)
|
perma_uuid = lookyloo.enqueue_capture(to_query, source='api', user=user, authenticated=flask_login.current_user.is_authenticated)
|
||||||
return perma_uuid
|
return perma_uuid
|
||||||
|
|
||||||
@api.doc(body=submit_fields_post)
|
@api.doc(body=submit_fields_post) # type: ignore[misc]
|
||||||
@api.produces(['text/text'])
|
@api.produces(['text/text']) # type: ignore[misc]
|
||||||
def post(self):
|
def post(self) -> str:
|
||||||
if flask_login.current_user.is_authenticated:
|
if flask_login.current_user.is_authenticated:
|
||||||
user = flask_login.current_user.get_id()
|
user = flask_login.current_user.get_id()
|
||||||
else:
|
else:
|
||||||
|
@ -447,30 +448,30 @@ class SubmitCapture(Resource):
|
||||||
@api.route('/bin/<string:capture_uuid>/screenshot')
|
@api.route('/bin/<string:capture_uuid>/screenshot')
|
||||||
@api.doc(description='Get the screenshot associated to the capture.',
|
@api.doc(description='Get the screenshot associated to the capture.',
|
||||||
params={'capture_uuid': 'The UUID of the capture'})
|
params={'capture_uuid': 'The UUID of the capture'})
|
||||||
class CaptureScreenshot(Resource):
|
class CaptureScreenshot(Resource): # type: ignore[misc]
|
||||||
|
|
||||||
@api.produces(['image/png'])
|
@api.produces(['image/png']) # type: ignore[misc]
|
||||||
def get(self, capture_uuid: str):
|
def get(self, capture_uuid: str) -> Response:
|
||||||
return send_file(lookyloo.get_screenshot(capture_uuid), mimetype='image/png')
|
return send_file(lookyloo.get_screenshot(capture_uuid), mimetype='image/png')
|
||||||
|
|
||||||
|
|
||||||
@api.route('/bin/<string:capture_uuid>/export')
|
@api.route('/bin/<string:capture_uuid>/export')
|
||||||
@api.doc(description='Get all the files generated by the capture, except the pickle.',
|
@api.doc(description='Get all the files generated by the capture, except the pickle.',
|
||||||
params={'capture_uuid': 'The UUID of the capture'})
|
params={'capture_uuid': 'The UUID of the capture'})
|
||||||
class CaptureExport(Resource):
|
class CaptureExport(Resource): # type: ignore[misc]
|
||||||
|
|
||||||
@api.produces(['application/zip'])
|
@api.produces(['application/zip']) # type: ignore[misc]
|
||||||
def get(self, capture_uuid: str):
|
def get(self, capture_uuid: str) -> Response:
|
||||||
return send_file(lookyloo.get_capture(capture_uuid), mimetype='application/zip')
|
return send_file(lookyloo.get_capture(capture_uuid), mimetype='application/zip')
|
||||||
|
|
||||||
|
|
||||||
@api.route('/bin/<string:capture_uuid>/data')
|
@api.route('/bin/<string:capture_uuid>/data')
|
||||||
@api.doc(description='Get the file downloaded by the capture.',
|
@api.doc(description='Get the file downloaded by the capture.',
|
||||||
params={'capture_uuid': 'The UUID of the capture'})
|
params={'capture_uuid': 'The UUID of the capture'})
|
||||||
class CaptureData(Resource):
|
class CaptureData(Resource): # type: ignore[misc]
|
||||||
|
|
||||||
@api.produces(['application/zip'])
|
@api.produces(['application/zip']) # type: ignore[misc]
|
||||||
def get(self, capture_uuid: str):
|
def get(self, capture_uuid: str) -> Response:
|
||||||
filename, data = lookyloo.get_data(capture_uuid)
|
filename, data = lookyloo.get_data(capture_uuid)
|
||||||
if not filename:
|
if not filename:
|
||||||
# This capture didn't trigger a download.
|
# This capture didn't trigger a download.
|
||||||
|
@ -499,10 +500,10 @@ compare_captures_fields = api.model('CompareCapturesFields', {
|
||||||
|
|
||||||
@api.route('/json/compare_captures')
|
@api.route('/json/compare_captures')
|
||||||
@api.doc(description='Compare two captures')
|
@api.doc(description='Compare two captures')
|
||||||
class CompareCaptures(Resource):
|
class CompareCaptures(Resource): # type: ignore[misc]
|
||||||
@api.doc(body=compare_captures_fields)
|
@api.doc(body=compare_captures_fields) # type: ignore[misc]
|
||||||
def post(self):
|
def post(self) -> dict[str, Any]:
|
||||||
parameters: Dict = request.get_json(force=True)
|
parameters: dict[str, Any] = request.get_json(force=True)
|
||||||
left_uuid = parameters.get('capture_left')
|
left_uuid = parameters.get('capture_left')
|
||||||
right_uuid = parameters.get('capture_right')
|
right_uuid = parameters.get('capture_right')
|
||||||
if not left_uuid or not right_uuid:
|
if not left_uuid or not right_uuid:
|
||||||
|
@ -545,10 +546,10 @@ comparables_model = api.model('ComparablesModel', {
|
||||||
|
|
||||||
@api.route('/json/<string:capture_uuid>/comparables')
|
@api.route('/json/<string:capture_uuid>/comparables')
|
||||||
@api.doc(description='Get the data we can compare across captures')
|
@api.doc(description='Get the data we can compare across captures')
|
||||||
class Comparables(Resource):
|
class Comparables(Resource): # type: ignore[misc]
|
||||||
|
|
||||||
@api.marshal_with(comparables_model)
|
@api.marshal_with(comparables_model) # type: ignore[misc]
|
||||||
def get(self, capture_uuid: str):
|
def get(self, capture_uuid: str) -> dict[str, Any]:
|
||||||
return comparator.get_comparables_capture(capture_uuid)
|
return comparator.get_comparables_capture(capture_uuid)
|
||||||
|
|
||||||
|
|
||||||
|
@ -561,10 +562,10 @@ takedown_fields = api.model('TakedownFields', {
|
||||||
|
|
||||||
@api.route('/json/takedown')
|
@api.route('/json/takedown')
|
||||||
@api.doc(description='Get information for triggering a takedown request')
|
@api.doc(description='Get information for triggering a takedown request')
|
||||||
class Takedown(Resource):
|
class Takedown(Resource): # type: ignore[misc]
|
||||||
@api.doc(body=takedown_fields)
|
@api.doc(body=takedown_fields) # type: ignore[misc]
|
||||||
def post(self):
|
def post(self) -> list[dict[str, Any]] | dict[str, str]:
|
||||||
parameters: Dict = request.get_json(force=True)
|
parameters: dict[str, Any] = request.get_json(force=True)
|
||||||
capture_uuid = parameters.get('capture_uuid')
|
capture_uuid = parameters.get('capture_uuid')
|
||||||
if not capture_uuid:
|
if not capture_uuid:
|
||||||
return {'error': f'Invalid request: {parameters}'}
|
return {'error': f'Invalid request: {parameters}'}
|
||||||
|
@ -576,10 +577,10 @@ class Takedown(Resource):
|
||||||
@api.route('/admin/rebuild_all')
|
@api.route('/admin/rebuild_all')
|
||||||
@api.doc(description='Rebuild all the trees. WARNING: IT IS GOING TO TAKE A VERY LONG TIME.',
|
@api.doc(description='Rebuild all the trees. WARNING: IT IS GOING TO TAKE A VERY LONG TIME.',
|
||||||
security='apikey')
|
security='apikey')
|
||||||
class RebuildAll(Resource):
|
class RebuildAll(Resource): # type: ignore[misc]
|
||||||
method_decorators = [api_auth_check]
|
method_decorators = [api_auth_check]
|
||||||
|
|
||||||
def post(self):
|
def post(self) -> dict[str, str] | tuple[dict[str, str], int]:
|
||||||
try:
|
try:
|
||||||
lookyloo.rebuild_all()
|
lookyloo.rebuild_all()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -591,10 +592,10 @@ class RebuildAll(Resource):
|
||||||
@api.route('/admin/rebuild_all_cache')
|
@api.route('/admin/rebuild_all_cache')
|
||||||
@api.doc(description='Rebuild all the caches. It will take a while, but less that rebuild all.',
|
@api.doc(description='Rebuild all the caches. It will take a while, but less that rebuild all.',
|
||||||
security='apikey')
|
security='apikey')
|
||||||
class RebuildAllCache(Resource):
|
class RebuildAllCache(Resource): # type: ignore[misc]
|
||||||
method_decorators = [api_auth_check]
|
method_decorators = [api_auth_check]
|
||||||
|
|
||||||
def post(self):
|
def post(self) -> dict[str, str] | tuple[dict[str, str], int]:
|
||||||
try:
|
try:
|
||||||
lookyloo.rebuild_cache()
|
lookyloo.rebuild_cache()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -607,10 +608,10 @@ class RebuildAllCache(Resource):
|
||||||
@api.doc(description='Rebuild the tree.',
|
@api.doc(description='Rebuild the tree.',
|
||||||
params={'capture_uuid': 'The UUID of the capture'},
|
params={'capture_uuid': 'The UUID of the capture'},
|
||||||
security='apikey')
|
security='apikey')
|
||||||
class CaptureRebuildTree(Resource):
|
class CaptureRebuildTree(Resource): # type: ignore[misc]
|
||||||
method_decorators = [api_auth_check]
|
method_decorators = [api_auth_check]
|
||||||
|
|
||||||
def post(self, capture_uuid):
|
def post(self, capture_uuid: str) -> dict[str, str] | tuple[dict[str, str], int]:
|
||||||
try:
|
try:
|
||||||
lookyloo.remove_pickle(capture_uuid)
|
lookyloo.remove_pickle(capture_uuid)
|
||||||
lookyloo.get_crawled_tree(capture_uuid)
|
lookyloo.get_crawled_tree(capture_uuid)
|
||||||
|
@ -624,10 +625,10 @@ class CaptureRebuildTree(Resource):
|
||||||
@api.doc(description='Hide the capture from the index.',
|
@api.doc(description='Hide the capture from the index.',
|
||||||
params={'capture_uuid': 'The UUID of the capture'},
|
params={'capture_uuid': 'The UUID of the capture'},
|
||||||
security='apikey')
|
security='apikey')
|
||||||
class CaptureHide(Resource):
|
class CaptureHide(Resource): # type: ignore[misc]
|
||||||
method_decorators = [api_auth_check]
|
method_decorators = [api_auth_check]
|
||||||
|
|
||||||
def post(self, capture_uuid):
|
def post(self, capture_uuid: str) -> dict[str, str] | tuple[dict[str, str], int]:
|
||||||
try:
|
try:
|
||||||
lookyloo.hide_capture(capture_uuid)
|
lookyloo.hide_capture(capture_uuid)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
@ -8,6 +8,7 @@ from pathlib import Path
|
||||||
from typing import Dict, List, Union
|
from typing import Dict, List, Union
|
||||||
|
|
||||||
import flask_login # type: ignore
|
import flask_login # type: ignore
|
||||||
|
from flask import Request
|
||||||
from werkzeug.security import generate_password_hash
|
from werkzeug.security import generate_password_hash
|
||||||
|
|
||||||
from lookyloo.default import get_config, get_homedir
|
from lookyloo.default import get_config, get_homedir
|
||||||
|
@ -23,7 +24,7 @@ def get_lookyloo_instance() -> Lookyloo:
|
||||||
return __global_lookyloo_instance
|
return __global_lookyloo_instance
|
||||||
|
|
||||||
|
|
||||||
def src_request_ip(request) -> str:
|
def src_request_ip(request: Request) -> str | None:
|
||||||
# NOTE: X-Real-IP is the IP passed by the reverse proxy in the headers.
|
# NOTE: X-Real-IP is the IP passed by the reverse proxy in the headers.
|
||||||
real_ip = request.headers.get('X-Real-IP')
|
real_ip = request.headers.get('X-Real-IP')
|
||||||
if not real_ip:
|
if not real_ip:
|
||||||
|
@ -31,11 +32,11 @@ def src_request_ip(request) -> str:
|
||||||
return real_ip
|
return real_ip
|
||||||
|
|
||||||
|
|
||||||
class User(flask_login.UserMixin):
|
class User(flask_login.UserMixin): # type: ignore[misc]
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def load_user_from_request(request):
|
def load_user_from_request(request: Request) -> User | None:
|
||||||
api_key = request.headers.get('Authorization')
|
api_key = request.headers.get('Authorization')
|
||||||
if not api_key:
|
if not api_key:
|
||||||
return None
|
return None
|
||||||
|
|
Loading…
Reference in New Issue