mirror of https://github.com/CIRCL/lookyloo
new: Force indexing of a specific capture from the UI
This should also make the indexing a lot faster.pull/937/head
parent
28e81a1eae
commit
1085932ad2
|
@ -4,14 +4,12 @@ from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import logging.config
|
import logging.config
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from redis import Redis
|
from redis import Redis
|
||||||
from typing import Generator
|
|
||||||
|
|
||||||
from lookyloo import Lookyloo, Indexing
|
from lookyloo import Indexing
|
||||||
from lookyloo.capturecache import get_pickle_path
|
|
||||||
from lookyloo.default import AbstractManager, get_config, get_socket_path
|
from lookyloo.default import AbstractManager, get_config, get_socket_path
|
||||||
from lookyloo.exceptions import NoValidHarFile
|
|
||||||
|
|
||||||
|
|
||||||
logging.config.dictConfig(get_config('logging'))
|
logging.config.dictConfig(get_config('logging'))
|
||||||
|
@ -21,7 +19,6 @@ class BackgroundIndexer(AbstractManager):
|
||||||
|
|
||||||
def __init__(self, full: bool=False, loglevel: int | None=None):
|
def __init__(self, full: bool=False, loglevel: int | None=None):
|
||||||
super().__init__(loglevel)
|
super().__init__(loglevel)
|
||||||
self.lookyloo = Lookyloo(cache_max_size=1)
|
|
||||||
self.is_public_instance = get_config('generic', 'public_instance')
|
self.is_public_instance = get_config('generic', 'public_instance')
|
||||||
self.full_indexer = full
|
self.full_indexer = full
|
||||||
self.indexing = Indexing(full_index=self.full_indexer)
|
self.indexing = Indexing(full_index=self.full_indexer)
|
||||||
|
@ -35,66 +32,22 @@ class BackgroundIndexer(AbstractManager):
|
||||||
|
|
||||||
def _to_run_forever(self) -> None:
|
def _to_run_forever(self) -> None:
|
||||||
self._check_indexes()
|
self._check_indexes()
|
||||||
# Don't need the cache in this class.
|
|
||||||
self.lookyloo.clear_tree_cache()
|
|
||||||
|
|
||||||
def _to_index_no_cache(self) -> Generator[tuple[tuple[bool, bool, bool, bool, bool, bool, bool, bool], str], None, None]:
|
|
||||||
# NOTE: only get the non-archived captures for now.
|
|
||||||
for uuid, directory in self.redis.hscan_iter('lookup_dirs'):
|
|
||||||
if not self.full_indexer:
|
|
||||||
# If we're not running the full indexer, check if the capture should be indexed.
|
|
||||||
if self.is_public_instance and self.redis.hexists(directory, 'no_index'):
|
|
||||||
# Capture unindexed
|
|
||||||
continue
|
|
||||||
|
|
||||||
if get_pickle_path(directory) is None:
|
|
||||||
# pickle isn't ready, we can't index.
|
|
||||||
continue
|
|
||||||
indexed = self.indexing.capture_indexed(uuid)
|
|
||||||
if all(indexed):
|
|
||||||
continue
|
|
||||||
yield indexed, uuid
|
|
||||||
|
|
||||||
def _check_indexes(self) -> None:
|
def _check_indexes(self) -> None:
|
||||||
if not self.indexing.can_index:
|
if not self.indexing.can_index():
|
||||||
# There is no reason to run this method in multiple scripts.
|
# There is no reason to run this method in multiple scripts.
|
||||||
self.logger.info('Indexing already ongoing in another process.')
|
self.logger.info('Indexing already ongoing in another process.')
|
||||||
return None
|
return None
|
||||||
self.logger.info(f'Check {self.script_name}...')
|
self.logger.info(f'Check {self.script_name}...')
|
||||||
for indexed, uuid_to_index in self._to_index_no_cache():
|
# NOTE: only get the non-archived captures for now.
|
||||||
try:
|
for uuid, d in self.redis.hscan_iter('lookup_dirs'):
|
||||||
ct = self.lookyloo.get_crawled_tree(uuid_to_index)
|
if not self.full_indexer:
|
||||||
except NoValidHarFile:
|
# If we're not running the full indexer, check if the capture should be indexed.
|
||||||
self.logger.warning(f'Broken pickle for {uuid_to_index}')
|
if self.is_public_instance and self.redis.hexists(d, 'no_index'):
|
||||||
self.lookyloo.remove_pickle(uuid_to_index)
|
# Capture unindexed
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if not indexed[0]:
|
self.indexing.index_capture(uuid, Path(d))
|
||||||
self.logger.info(f'Indexing urls for {uuid_to_index}')
|
|
||||||
self.indexing.index_url_capture(ct)
|
|
||||||
if not indexed[1]:
|
|
||||||
self.logger.info(f'Indexing resources for {uuid_to_index}')
|
|
||||||
self.indexing.index_body_hashes_capture(ct)
|
|
||||||
if not indexed[2]:
|
|
||||||
self.logger.info(f'Indexing cookies for {uuid_to_index}')
|
|
||||||
self.indexing.index_cookies_capture(ct)
|
|
||||||
if not indexed[3]:
|
|
||||||
self.logger.info(f'Indexing HH Hashes for {uuid_to_index}')
|
|
||||||
self.indexing.index_http_headers_hashes_capture(ct)
|
|
||||||
if not indexed[4]:
|
|
||||||
self.logger.info(f'Indexing favicons for {uuid_to_index}')
|
|
||||||
favicons = self.lookyloo.get_potential_favicons(uuid_to_index, all_favicons=True, for_datauri=False)
|
|
||||||
self.indexing.index_favicons_capture(uuid_to_index, favicons)
|
|
||||||
if not indexed[5]:
|
|
||||||
self.logger.info(f'Indexing identifiers for {uuid_to_index}')
|
|
||||||
self.indexing.index_identifiers_capture(ct)
|
|
||||||
if not indexed[6]:
|
|
||||||
self.logger.info(f'Indexing categories for {uuid_to_index}')
|
|
||||||
categories = self.lookyloo.categories_capture(uuid_to_index)
|
|
||||||
self.indexing.index_categories_capture(uuid_to_index, categories)
|
|
||||||
if not indexed[7]:
|
|
||||||
self.logger.info(f'Indexing hash types for {uuid_to_index}')
|
|
||||||
self.indexing.index_capture_hashes_types(ct)
|
|
||||||
self.indexing.indexing_done()
|
self.indexing.indexing_done()
|
||||||
self.logger.info('... done.')
|
self.logger.info('... done.')
|
||||||
|
|
||||||
|
|
|
@ -16,8 +16,8 @@ import time
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from collections.abc import Mapping
|
from collections.abc import Mapping
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from functools import lru_cache, _CacheInfo as CacheInfo
|
from functools import _CacheInfo as CacheInfo
|
||||||
from logging import Logger, LoggerAdapter
|
from logging import LoggerAdapter
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, MutableMapping, Iterator
|
from typing import Any, MutableMapping, Iterator
|
||||||
|
|
||||||
|
@ -28,7 +28,7 @@ from pyipasnhistory import IPASNHistory # type: ignore[attr-defined]
|
||||||
from redis import Redis
|
from redis import Redis
|
||||||
|
|
||||||
from .context import Context
|
from .context import Context
|
||||||
from .helpers import get_captures_dir, is_locked
|
from .helpers import get_captures_dir, is_locked, load_pickle_tree, get_pickle_path, remove_pickle_tree
|
||||||
from .indexing import Indexing
|
from .indexing import Indexing
|
||||||
from .default import LookylooException, try_make_file, get_config
|
from .default import LookylooException, try_make_file, get_config
|
||||||
from .exceptions import MissingCaptureDirectory, NoValidHarFile, MissingUUID, TreeNeedsRebuild
|
from .exceptions import MissingCaptureDirectory, NoValidHarFile, MissingUUID, TreeNeedsRebuild
|
||||||
|
@ -106,63 +106,6 @@ class CaptureCache():
|
||||||
return load_pickle_tree(self.capture_dir, self.capture_dir.stat().st_mtime, self.logger)
|
return load_pickle_tree(self.capture_dir, self.capture_dir.stat().st_mtime, self.logger)
|
||||||
|
|
||||||
|
|
||||||
def get_pickle_path(capture_dir: Path | str) -> Path | None:
|
|
||||||
if isinstance(capture_dir, str):
|
|
||||||
capture_dir = Path(capture_dir)
|
|
||||||
pickle_file_gz = capture_dir / 'tree.pickle.gz'
|
|
||||||
if pickle_file_gz.exists():
|
|
||||||
return pickle_file_gz
|
|
||||||
|
|
||||||
pickle_file = capture_dir / 'tree.pickle'
|
|
||||||
if pickle_file.exists():
|
|
||||||
return pickle_file
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def remove_pickle_tree(capture_dir: Path) -> None:
|
|
||||||
pickle_path = get_pickle_path(capture_dir)
|
|
||||||
if pickle_path and pickle_path.exists():
|
|
||||||
pickle_path.unlink()
|
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=64)
|
|
||||||
def load_pickle_tree(capture_dir: Path, last_mod_time: int, logger: Logger) -> CrawledTree:
|
|
||||||
pickle_path = get_pickle_path(capture_dir)
|
|
||||||
tree = None
|
|
||||||
try:
|
|
||||||
if pickle_path:
|
|
||||||
if pickle_path.suffix == '.gz':
|
|
||||||
with gzip.open(pickle_path, 'rb') as _pg:
|
|
||||||
tree = pickle.load(_pg)
|
|
||||||
else: # not a GZ pickle
|
|
||||||
with pickle_path.open('rb') as _p:
|
|
||||||
tree = pickle.load(_p)
|
|
||||||
except pickle.UnpicklingError:
|
|
||||||
remove_pickle_tree(capture_dir)
|
|
||||||
except EOFError:
|
|
||||||
remove_pickle_tree(capture_dir)
|
|
||||||
except Exception:
|
|
||||||
logger.exception('Unexpected exception when unpickling.')
|
|
||||||
remove_pickle_tree(capture_dir)
|
|
||||||
|
|
||||||
if tree:
|
|
||||||
try:
|
|
||||||
if tree.root_hartree.har.path.exists():
|
|
||||||
return tree
|
|
||||||
else:
|
|
||||||
# The capture was moved.
|
|
||||||
remove_pickle_tree(capture_dir)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f'The pickle is broken, removing: {e}')
|
|
||||||
remove_pickle_tree(capture_dir)
|
|
||||||
|
|
||||||
if list(capture_dir.rglob('*.har')) or list(capture_dir.rglob('*.har.gz')):
|
|
||||||
raise TreeNeedsRebuild('We have HAR files and need to rebuild the tree.')
|
|
||||||
# The tree doesn't need to be rebuilt if there are no HAR files.
|
|
||||||
raise NoValidHarFile("Couldn't find HAR files")
|
|
||||||
|
|
||||||
|
|
||||||
def serialize_sets(obj: Any) -> Any:
|
def serialize_sets(obj: Any) -> Any:
|
||||||
if isinstance(obj, set):
|
if isinstance(obj, set):
|
||||||
return list(obj)
|
return list(obj)
|
||||||
|
|
|
@ -3,10 +3,12 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import configparser
|
import configparser
|
||||||
|
import gzip
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import pickle
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
@ -14,10 +16,11 @@ from datetime import datetime, timedelta, date
|
||||||
from functools import lru_cache, cache
|
from functools import lru_cache, cache
|
||||||
from importlib.metadata import version
|
from importlib.metadata import version
|
||||||
from io import BufferedIOBase
|
from io import BufferedIOBase
|
||||||
|
from logging import Logger
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from pydantic import field_validator
|
from pydantic import field_validator
|
||||||
from pydantic_core import from_json
|
from pydantic_core import from_json
|
||||||
from typing import Any
|
from typing import Any, TYPE_CHECKING
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
|
||||||
|
@ -31,9 +34,10 @@ from werkzeug.user_agent import UserAgent
|
||||||
from werkzeug.utils import cached_property
|
from werkzeug.utils import cached_property
|
||||||
|
|
||||||
from .default import get_homedir, safe_create_dir, get_config, LookylooException
|
from .default import get_homedir, safe_create_dir, get_config, LookylooException
|
||||||
from .indexing import Indexing
|
from .exceptions import NoValidHarFile, TreeNeedsRebuild
|
||||||
# from .exceptions import InvalidCaptureSetting
|
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from .indexing import Indexing
|
||||||
|
|
||||||
logger = logging.getLogger('Lookyloo - Helpers')
|
logger = logging.getLogger('Lookyloo - Helpers')
|
||||||
|
|
||||||
|
@ -441,8 +445,66 @@ def load_user_config(username: str) -> dict[str, Any] | None:
|
||||||
|
|
||||||
@cache
|
@cache
|
||||||
def get_indexing(full: bool=False) -> Indexing:
|
def get_indexing(full: bool=False) -> Indexing:
|
||||||
|
from .indexing import Indexing
|
||||||
if not get_config('generic', 'index_everything'):
|
if not get_config('generic', 'index_everything'):
|
||||||
return Indexing()
|
return Indexing()
|
||||||
if full:
|
if full:
|
||||||
return Indexing(full_index=True)
|
return Indexing(full_index=True)
|
||||||
return Indexing()
|
return Indexing()
|
||||||
|
|
||||||
|
|
||||||
|
def get_pickle_path(capture_dir: Path | str) -> Path | None:
|
||||||
|
if isinstance(capture_dir, str):
|
||||||
|
capture_dir = Path(capture_dir)
|
||||||
|
pickle_file_gz = capture_dir / 'tree.pickle.gz'
|
||||||
|
if pickle_file_gz.exists():
|
||||||
|
return pickle_file_gz
|
||||||
|
|
||||||
|
pickle_file = capture_dir / 'tree.pickle'
|
||||||
|
if pickle_file.exists():
|
||||||
|
return pickle_file
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def remove_pickle_tree(capture_dir: Path) -> None:
|
||||||
|
pickle_path = get_pickle_path(capture_dir)
|
||||||
|
if pickle_path and pickle_path.exists():
|
||||||
|
pickle_path.unlink()
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=64)
|
||||||
|
def load_pickle_tree(capture_dir: Path, last_mod_time: int, logger: Logger) -> CrawledTree:
|
||||||
|
pickle_path = get_pickle_path(capture_dir)
|
||||||
|
tree = None
|
||||||
|
try:
|
||||||
|
if pickle_path:
|
||||||
|
if pickle_path.suffix == '.gz':
|
||||||
|
with gzip.open(pickle_path, 'rb') as _pg:
|
||||||
|
tree = pickle.load(_pg)
|
||||||
|
else: # not a GZ pickle
|
||||||
|
with pickle_path.open('rb') as _p:
|
||||||
|
tree = pickle.load(_p)
|
||||||
|
except pickle.UnpicklingError:
|
||||||
|
remove_pickle_tree(capture_dir)
|
||||||
|
except EOFError:
|
||||||
|
remove_pickle_tree(capture_dir)
|
||||||
|
except Exception:
|
||||||
|
logger.exception('Unexpected exception when unpickling.')
|
||||||
|
remove_pickle_tree(capture_dir)
|
||||||
|
|
||||||
|
if tree:
|
||||||
|
try:
|
||||||
|
if tree.root_hartree.har.path.exists():
|
||||||
|
return tree
|
||||||
|
else:
|
||||||
|
# The capture was moved.
|
||||||
|
remove_pickle_tree(capture_dir)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f'The pickle is broken, removing: {e}')
|
||||||
|
remove_pickle_tree(capture_dir)
|
||||||
|
|
||||||
|
if list(capture_dir.rglob('*.har')) or list(capture_dir.rglob('*.har.gz')):
|
||||||
|
raise TreeNeedsRebuild('We have HAR files and need to rebuild the tree.')
|
||||||
|
# The tree doesn't need to be rebuilt if there are no HAR files.
|
||||||
|
raise NoValidHarFile("Couldn't find HAR files")
|
||||||
|
|
|
@ -15,13 +15,15 @@ import mmh3
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from hashlib import sha256
|
from hashlib import sha256
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from har2tree import CrawledTree
|
from har2tree import CrawledTree
|
||||||
from redis import ConnectionPool, Redis
|
from redis import ConnectionPool, Redis
|
||||||
from redis.connection import UnixDomainSocketConnection
|
from redis.connection import UnixDomainSocketConnection
|
||||||
|
|
||||||
|
from .exceptions import NoValidHarFile, TreeNeedsRebuild
|
||||||
|
from .helpers import load_pickle_tree
|
||||||
from .default import get_socket_path, get_config
|
from .default import get_socket_path, get_config
|
||||||
# from .helpers import get_public_suffix_list
|
|
||||||
|
|
||||||
|
|
||||||
class Indexing():
|
class Indexing():
|
||||||
|
@ -53,11 +55,16 @@ class Indexing():
|
||||||
def redis(self) -> Redis: # type: ignore[type-arg]
|
def redis(self) -> Redis: # type: ignore[type-arg]
|
||||||
return Redis(connection_pool=self.__redis_pool)
|
return Redis(connection_pool=self.__redis_pool)
|
||||||
|
|
||||||
@property
|
def can_index(self, capture_uuid: str | None=None) -> bool:
|
||||||
def can_index(self) -> bool:
|
if capture_uuid:
|
||||||
|
return bool(self.redis.set(f'ongoing_indexing|{capture_uuid}', 1, ex=360, nx=True))
|
||||||
|
|
||||||
return bool(self.redis.set('ongoing_indexing', 1, ex=3600, nx=True))
|
return bool(self.redis.set('ongoing_indexing', 1, ex=3600, nx=True))
|
||||||
|
|
||||||
def indexing_done(self) -> None:
|
def indexing_done(self, capture_uuid: str | None=None) -> None:
|
||||||
|
if capture_uuid:
|
||||||
|
self.redis.delete(f'ongoing_indexing|{capture_uuid}')
|
||||||
|
else:
|
||||||
self.redis.delete('ongoing_indexing')
|
self.redis.delete('ongoing_indexing')
|
||||||
|
|
||||||
def force_reindex(self, capture_uuid: str) -> None:
|
def force_reindex(self, capture_uuid: str) -> None:
|
||||||
|
@ -91,6 +98,55 @@ class Indexing():
|
||||||
# This call for sure returns a tuple of 7 booleans
|
# This call for sure returns a tuple of 7 booleans
|
||||||
return tuple(to_return) # type: ignore[return-value]
|
return tuple(to_return) # type: ignore[return-value]
|
||||||
|
|
||||||
|
def index_capture(self, uuid_to_index: str, directory: Path) -> None:
|
||||||
|
if not self.can_index(uuid_to_index):
|
||||||
|
self.logger.info(f'Indexing on {uuid_to_index} ongoing, skipping. ')
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
indexed = self.capture_indexed(uuid_to_index)
|
||||||
|
if all(indexed):
|
||||||
|
return
|
||||||
|
|
||||||
|
if not any((directory / pickle_name).exists()
|
||||||
|
for pickle_name in ['tree.pickle.gz', 'tree.pickle']):
|
||||||
|
self.logger.warning(f'No pickle for {uuid_to_index}, skipping. ')
|
||||||
|
return
|
||||||
|
|
||||||
|
# do the indexing
|
||||||
|
ct = load_pickle_tree(directory, directory.stat().st_mtime, self.logger)
|
||||||
|
if not indexed[0]:
|
||||||
|
self.logger.info(f'Indexing urls for {uuid_to_index}')
|
||||||
|
self.index_url_capture(ct)
|
||||||
|
if not indexed[1]:
|
||||||
|
self.logger.info(f'Indexing resources for {uuid_to_index}')
|
||||||
|
self.index_body_hashes_capture(ct)
|
||||||
|
if not indexed[2]:
|
||||||
|
self.logger.info(f'Indexing cookies for {uuid_to_index}')
|
||||||
|
self.index_cookies_capture(ct)
|
||||||
|
if not indexed[3]:
|
||||||
|
self.logger.info(f'Indexing HH Hashes for {uuid_to_index}')
|
||||||
|
self.index_http_headers_hashes_capture(ct)
|
||||||
|
if not indexed[4]:
|
||||||
|
self.logger.info(f'Indexing favicons for {uuid_to_index}')
|
||||||
|
self.index_favicons_capture(uuid_to_index, directory)
|
||||||
|
if not indexed[5]:
|
||||||
|
self.logger.info(f'Indexing identifiers for {uuid_to_index}')
|
||||||
|
self.index_identifiers_capture(ct)
|
||||||
|
if not indexed[6]:
|
||||||
|
self.logger.info(f'Indexing categories for {uuid_to_index}')
|
||||||
|
self.index_categories_capture(uuid_to_index, directory)
|
||||||
|
if not indexed[7]:
|
||||||
|
self.logger.info(f'Indexing hash types for {uuid_to_index}')
|
||||||
|
self.index_capture_hashes_types(ct)
|
||||||
|
|
||||||
|
except (TreeNeedsRebuild, NoValidHarFile) as e:
|
||||||
|
self.logger.warning(f'Error loading the pickle for {uuid_to_index}: {e}')
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(f'Error during indexing for {uuid_to_index}: {e}')
|
||||||
|
finally:
|
||||||
|
self.indexing_done(uuid_to_index)
|
||||||
|
|
||||||
# ###### Cookies ######
|
# ###### Cookies ######
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -349,18 +405,16 @@ class Indexing():
|
||||||
def favicon_number_captures(self, favicon_sha512: str) -> int:
|
def favicon_number_captures(self, favicon_sha512: str) -> int:
|
||||||
return self.redis.scard(f'favicons|{favicon_sha512}|captures')
|
return self.redis.scard(f'favicons|{favicon_sha512}|captures')
|
||||||
|
|
||||||
def index_favicons_capture(self, capture_uuid: str, favicons: BytesIO) -> None:
|
def index_favicons_capture(self, capture_uuid: str, capture_dir: Path) -> None:
|
||||||
if self.redis.sismember('indexed_favicons', capture_uuid):
|
if self.redis.sismember('indexed_favicons', capture_uuid):
|
||||||
# Do not reindex
|
# Do not reindex
|
||||||
return
|
return
|
||||||
self.redis.sadd('indexed_favicons', capture_uuid)
|
self.redis.sadd('indexed_favicons', capture_uuid)
|
||||||
self.logger.debug(f'Indexing favicons for {capture_uuid} ... ')
|
self.logger.debug(f'Indexing favicons for {capture_uuid} ... ')
|
||||||
pipeline = self.redis.pipeline()
|
pipeline = self.redis.pipeline()
|
||||||
with ZipFile(favicons, 'r') as myzip:
|
for favicon_path in sorted(list(capture_dir.glob('*.potential_favicons.ico'))):
|
||||||
for name in myzip.namelist():
|
with favicon_path.open('rb') as f:
|
||||||
if not name.endswith('.ico'):
|
favicon = f.read()
|
||||||
continue
|
|
||||||
favicon = myzip.read(name)
|
|
||||||
if not favicon:
|
if not favicon:
|
||||||
# Empty file, ignore.
|
# Empty file, ignore.
|
||||||
continue
|
continue
|
||||||
|
@ -552,11 +606,20 @@ class Indexing():
|
||||||
def categories(self) -> set[str]:
|
def categories(self) -> set[str]:
|
||||||
return self.redis.smembers('categories')
|
return self.redis.smembers('categories')
|
||||||
|
|
||||||
def index_categories_capture(self, capture_uuid: str, capture_categories: list[str]) -> None:
|
def index_categories_capture(self, capture_uuid: str, capture_dir: Path) -> None:
|
||||||
if self.redis.sismember('indexed_categories', capture_uuid):
|
if self.redis.sismember('indexed_categories', capture_uuid):
|
||||||
# do not reindex
|
# do not reindex
|
||||||
return
|
return
|
||||||
|
# Make sure we don't reindex
|
||||||
self.redis.sadd('indexed_categories', capture_uuid)
|
self.redis.sadd('indexed_categories', capture_uuid)
|
||||||
|
|
||||||
|
categ_file = capture_dir / 'categories'
|
||||||
|
if categ_file.exists():
|
||||||
|
with categ_file.open('r') as f:
|
||||||
|
capture_categories = [c.strip() for c in f.readlines()]
|
||||||
|
else:
|
||||||
|
return
|
||||||
|
|
||||||
added_in_existing_categories = set()
|
added_in_existing_categories = set()
|
||||||
pipeline = self.redis.pipeline()
|
pipeline = self.redis.pipeline()
|
||||||
for c in self.categories:
|
for c in self.categories:
|
||||||
|
|
|
@ -1142,6 +1142,14 @@ def send_mail(tree_uuid: str) -> WerkzeugResponse:
|
||||||
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/tree/<string:tree_uuid>/trigger_indexing', methods=['POST', 'GET'])
|
||||||
|
def trigger_indexing(tree_uuid: str) -> WerkzeugResponse:
|
||||||
|
cache = lookyloo.capture_cache(tree_uuid)
|
||||||
|
if cache and hasattr(cache, 'capture_dir'):
|
||||||
|
get_indexing(flask_login.current_user).index_capture(tree_uuid, cache.capture_dir)
|
||||||
|
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>', methods=['GET'])
|
||||||
@app.route('/tree/<string:tree_uuid>/<string:node_uuid>', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/<string:node_uuid>', methods=['GET'])
|
||||||
def tree(tree_uuid: str, node_uuid: str | None=None) -> Response | str | WerkzeugResponse:
|
def tree(tree_uuid: str, node_uuid: str | None=None) -> Response | str | WerkzeugResponse:
|
||||||
|
@ -1199,6 +1207,11 @@ def tree(tree_uuid: str, node_uuid: str | None=None) -> Response | str | Werkzeu
|
||||||
monitoring_collections = []
|
monitoring_collections = []
|
||||||
flash(f'Unable to get existing connections from the monitoring : {e}', 'warning')
|
flash(f'Unable to get existing connections from the monitoring : {e}', 'warning')
|
||||||
|
|
||||||
|
# Check if the capture has been indexed yet. Print a warning if not.
|
||||||
|
capture_indexed = all(get_indexing(flask_login.current_user).capture_indexed(tree_uuid))
|
||||||
|
if not capture_indexed:
|
||||||
|
flash('The capture has not been indexed yet. Some correlations will be missing.', 'warning')
|
||||||
|
|
||||||
return render_template('tree.html', tree_json=ct.to_json(),
|
return render_template('tree.html', tree_json=ct.to_json(),
|
||||||
info=cache,
|
info=cache,
|
||||||
tree_uuid=tree_uuid, public_domain=lookyloo.public_domain,
|
tree_uuid=tree_uuid, public_domain=lookyloo.public_domain,
|
||||||
|
@ -1221,6 +1234,7 @@ def tree(tree_uuid: str, node_uuid: str | None=None) -> Response | str | Werkzeu
|
||||||
confirm_message=confirm_message if confirm_message else 'Tick to confirm.',
|
confirm_message=confirm_message if confirm_message else 'Tick to confirm.',
|
||||||
parent_uuid=cache.parent,
|
parent_uuid=cache.parent,
|
||||||
has_redirects=True if cache.redirects else False,
|
has_redirects=True if cache.redirects else False,
|
||||||
|
capture_indexed=capture_indexed,
|
||||||
capture_settings=capture_settings.model_dump(exclude_none=True) if capture_settings else {})
|
capture_settings=capture_settings.model_dump(exclude_none=True) if capture_settings else {})
|
||||||
|
|
||||||
except NoValidHarFile:
|
except NoValidHarFile:
|
||||||
|
|
|
@ -407,6 +407,10 @@
|
||||||
<div id="tools-menu" class="dropdown">
|
<div id="tools-menu" class="dropdown">
|
||||||
<button class="dropbtn">Analytical Tools</button>
|
<button class="dropbtn">Analytical Tools</button>
|
||||||
<div id="tools-menu-content" class="dropdown-content">
|
<div id="tools-menu-content" class="dropdown-content">
|
||||||
|
{% if not capture_indexed %}
|
||||||
|
<a href="{{ url_for('trigger_indexing', tree_uuid=tree_uuid) }}" role="button" class="btn btn-warning"
|
||||||
|
title="The capture isn't (fully) indexed, index now.">Index capture</a>
|
||||||
|
{% endif %}
|
||||||
<a href="#modulesModal" data-remote="{{ url_for('trigger_modules', tree_uuid=tree_uuid, force=False) }}"
|
<a href="#modulesModal" data-remote="{{ url_for('trigger_modules', tree_uuid=tree_uuid, force=False) }}"
|
||||||
data-bs-toggle="modal" data-bs-target="#modulesModal" role="button"
|
data-bs-toggle="modal" data-bs-target="#modulesModal" role="button"
|
||||||
title="Lookups from supported 3rd party services">Third Party Reports</a>
|
title="Lookups from supported 3rd party services">Third Party Reports</a>
|
||||||
|
|
Loading…
Reference in New Issue