chg: More cleanup to use the new caching system as it should be.

pull/267/head
Raphaël Vinot 2021-09-27 11:36:27 +02:00
parent d05b1edf48
commit 681e136ef4
7 changed files with 54 additions and 91 deletions

View File

@ -176,11 +176,11 @@ class CapturesIndex(Mapping):
to_return = Path(capture_dir) to_return = Path(capture_dir)
if to_return.exists(): if to_return.exists():
return to_return return to_return
self.redis.hdel('lookup_dirs_archived', uuid)
# The capture was removed, remove the UUID # The capture was removed, remove the UUID
self.redis.hdel('lookup_dirs_archived', uuid)
self.redis.delete(capture_dir)
self.logger.warning(f'UUID ({uuid}) linked to a missing directory ({capture_dir}).') self.logger.warning(f'UUID ({uuid}) linked to a missing directory ({capture_dir}).')
raise MissingCaptureDirectory(f'UUID ({uuid}) linked to a missing directory ({capture_dir}).') raise MissingCaptureDirectory(f'UUID ({uuid}) linked to a missing directory ({capture_dir}).')
raise MissingUUID(f'Unable to find UUID {uuid}.') raise MissingUUID(f'Unable to find UUID {uuid}.')
def _create_pickle(self, capture_dir: Path) -> CrawledTree: def _create_pickle(self, capture_dir: Path) -> CrawledTree:

View File

@ -27,7 +27,7 @@ from werkzeug.useragents import UserAgent
from .capturecache import CaptureCache, CapturesIndex from .capturecache import CaptureCache, CapturesIndex
from .context import Context from .context import Context
from .exceptions import (LookylooException, MissingCaptureDirectory, from .exceptions import (LookylooException, MissingCaptureDirectory,
MissingUUID, TreeNeedsRebuild) MissingUUID, TreeNeedsRebuild, NoValidHarFile)
from .helpers import (CaptureStatus, get_captures_dir, get_config, from .helpers import (CaptureStatus, get_captures_dir, get_config,
get_email_template, get_homedir, get_resources_hashes, get_email_template, get_homedir, get_resources_hashes,
get_socket_path, get_splash_url, get_taxonomies, uniq_domains) get_socket_path, get_splash_url, get_taxonomies, uniq_domains)
@ -85,10 +85,6 @@ class Lookyloo():
def redis(self): def redis(self):
return Redis(connection_pool=self.redis_pool) return Redis(connection_pool=self.redis_pool)
def _get_capture_dir(self, capture_uuid: str, /) -> Path:
'''Use the cache to get a capture directory from a capture UUID'''
return self._captures_index[capture_uuid].capture_dir
def add_context(self, capture_uuid: str, /, urlnode_uuid: str, *, ressource_hash: str, def add_context(self, capture_uuid: str, /, urlnode_uuid: str, *, ressource_hash: str,
legitimate: bool, malicious: bool, details: Dict[str, Dict[str, str]]): legitimate: bool, malicious: bool, details: Dict[str, Dict[str, str]]):
'''Adds context information to a capture or a URL node''' '''Adds context information to a capture or a URL node'''
@ -142,8 +138,7 @@ class Lookyloo():
def get_meta(self, capture_uuid: str, /) -> Dict[str, str]: def get_meta(self, capture_uuid: str, /) -> Dict[str, str]:
'''Get the meta informations from a capture (mostly, details about the User Agent used.)''' '''Get the meta informations from a capture (mostly, details about the User Agent used.)'''
capture_dir = self._get_capture_dir(capture_uuid) metafile = self._captures_index[capture_uuid].capture_dir / 'meta'
metafile = capture_dir / 'meta'
if metafile.exists(): if metafile.exists():
with metafile.open('r') as f: with metafile.open('r') as f:
return json.load(f) return json.load(f)
@ -171,10 +166,10 @@ class Lookyloo():
def categories_capture(self, capture_uuid: str, /) -> Dict[str, Any]: def categories_capture(self, capture_uuid: str, /) -> Dict[str, Any]:
'''Get all the categories related to a capture, in MISP Taxonomies format''' '''Get all the categories related to a capture, in MISP Taxonomies format'''
capture_dir = self._get_capture_dir(capture_uuid) categ_file = self._captures_index[capture_uuid].capture_dir / 'categories'
# get existing categories if possible # get existing categories if possible
if (capture_dir / 'categories').exists(): if categ_file.exists():
with (capture_dir / 'categories').open() as f: with categ_file.open() as f:
current_categories = [line.strip() for line in f.readlines()] current_categories = [line.strip() for line in f.readlines()]
return {e: self.taxonomies.revert_machinetag(e) for e in current_categories} return {e: self.taxonomies.revert_machinetag(e) for e in current_categories}
return {} return {}
@ -186,30 +181,30 @@ class Lookyloo():
# Make sure the category is mappable to a taxonomy. # Make sure the category is mappable to a taxonomy.
self.taxonomies.revert_machinetag(category) self.taxonomies.revert_machinetag(category)
capture_dir = self._get_capture_dir(capture_uuid) categ_file = self._captures_index[capture_uuid].capture_dir / 'categories'
# get existing categories if possible # get existing categories if possible
if (capture_dir / 'categories').exists(): if categ_file.exists():
with (capture_dir / 'categories').open() as f: with categ_file.open() as f:
current_categories = set(line.strip() for line in f.readlines()) current_categories = set(line.strip() for line in f.readlines())
else: else:
current_categories = set() current_categories = set()
current_categories.add(category) current_categories.add(category)
with (capture_dir / 'categories').open('w') as f: with categ_file.open('w') as f:
f.writelines(f'{t}\n' for t in current_categories) f.writelines(f'{t}\n' for t in current_categories)
def uncategorize_capture(self, capture_uuid: str, /, category: str) -> None: def uncategorize_capture(self, capture_uuid: str, /, category: str) -> None:
'''Remove a category (MISP Taxonomy tag) from a capture.''' '''Remove a category (MISP Taxonomy tag) from a capture.'''
if not get_config('generic', 'enable_categorization'): if not get_config('generic', 'enable_categorization'):
return return
capture_dir = self._get_capture_dir(capture_uuid) categ_file = self._captures_index[capture_uuid].capture_dir / 'categories'
# get existing categories if possible # get existing categories if possible
if (capture_dir / 'categories').exists(): if categ_file.exists():
with (capture_dir / 'categories').open() as f: with categ_file.open() as f:
current_categories = set(line.strip() for line in f.readlines()) current_categories = set(line.strip() for line in f.readlines())
else: else:
current_categories = set() current_categories = set()
current_categories.remove(category) current_categories.remove(category)
with (capture_dir / 'categories').open('w') as f: with categ_file.open('w') as f:
f.writelines(f'{t}\n' for t in current_categories) f.writelines(f'{t}\n' for t in current_categories)
def trigger_modules(self, capture_uuid: str, /, force: bool=False, auto_trigger: bool=False) -> Dict: def trigger_modules(self, capture_uuid: str, /, force: bool=False, auto_trigger: bool=False) -> Dict:
@ -283,23 +278,19 @@ class Lookyloo():
"""Add the capture in the hidden pool (not shown on the front page) """Add the capture in the hidden pool (not shown on the front page)
NOTE: it won't remove the correlations until they are rebuilt. NOTE: it won't remove the correlations until they are rebuilt.
""" """
self.redis.hset(str(self._get_capture_dir(capture_uuid)), 'no_index', 1) capture_dir = self._captures_index[capture_uuid].capture_dir
(self._get_capture_dir(capture_uuid) / 'no_index').touch() self.redis.hset(str(capture_dir), 'no_index', 1)
(capture_dir / 'no_index').touch()
self._captures_index.reload_cache(capture_uuid) self._captures_index.reload_cache(capture_uuid)
def update_tree_cache_info(self, process_id: int, classname: str) -> None: def update_tree_cache_info(self, process_id: int, classname: str) -> None:
self.redis.hset('tree_cache', f'{process_id}|{classname}', str(self._captures_index.lru_cache_status())) self.redis.hset('tree_cache', f'{process_id}|{classname}', str(self._captures_index.lru_cache_status()))
@property
def capture_uuids(self) -> List[str]:
'''All the capture UUIDs present in the cache.'''
return self.redis.hkeys('lookup_dirs')
def sorted_capture_cache(self, capture_uuids: Optional[Iterable[str]]=None) -> List[CaptureCache]: def sorted_capture_cache(self, capture_uuids: Optional[Iterable[str]]=None) -> List[CaptureCache]:
'''Get all the captures in the cache, sorted by timestamp (new -> old).''' '''Get all the captures in the cache, sorted by timestamp (new -> old).'''
if capture_uuids is None: if capture_uuids is None:
# Sort all captures # Sort all recent captures
capture_uuids = self.capture_uuids capture_uuids = self.redis.hkeys('lookup_dirs')
if not capture_uuids: if not capture_uuids:
# No captures at all on the instance # No captures at all on the instance
return [] return []
@ -309,6 +300,7 @@ class Lookyloo():
return all_cache return all_cache
def get_capture_status(self, capture_uuid: str, /) -> CaptureStatus: def get_capture_status(self, capture_uuid: str, /) -> CaptureStatus:
'''Returns the status (queued, ongoing, done, or UUID unknown)'''
if self.redis.zrank('to_capture', capture_uuid) is not None: if self.redis.zrank('to_capture', capture_uuid) is not None:
return CaptureStatus.QUEUED return CaptureStatus.QUEUED
elif self.redis.hexists('lookup_dirs', capture_uuid): elif self.redis.hexists('lookup_dirs', capture_uuid):
@ -318,6 +310,7 @@ class Lookyloo():
return CaptureStatus.UNKNOWN return CaptureStatus.UNKNOWN
def try_error_status(self, capture_uuid: str, /) -> Optional[str]: def try_error_status(self, capture_uuid: str, /) -> Optional[str]:
'''If it is not possible to do the capture, we store the error for a short amount of time'''
return self.redis.get(f'error_{capture_uuid}') return self.redis.get(f'error_{capture_uuid}')
def capture_cache(self, capture_uuid: str, /) -> Optional[CaptureCache]: def capture_cache(self, capture_uuid: str, /) -> Optional[CaptureCache]:
@ -351,7 +344,7 @@ class Lookyloo():
def enqueue_capture(self, query: MutableMapping[str, Any], source: str, user: str, authenticated: bool) -> str: def enqueue_capture(self, query: MutableMapping[str, Any], source: str, user: str, authenticated: bool) -> str:
'''Enqueue a query in the capture queue (used by the UI and the API for asynchronous processing)''' '''Enqueue a query in the capture queue (used by the UI and the API for asynchronous processing)'''
def _get_priority(source: str, user: str, authenticated: bool) -> int: def get_priority(source: str, user: str, authenticated: bool) -> int:
src_prio: int = self._priority['sources'][source] if source in self._priority['sources'] else -1 src_prio: int = self._priority['sources'][source] if source in self._priority['sources'] else -1
if not authenticated: if not authenticated:
usr_prio = self._priority['users']['_default_anon'] usr_prio = self._priority['users']['_default_anon']
@ -364,7 +357,7 @@ class Lookyloo():
usr_prio = self._priority['users'][user] if self._priority['users'].get(user) else self._priority['users']['_default_auth'] usr_prio = self._priority['users'][user] if self._priority['users'].get(user) else self._priority['users']['_default_auth']
return src_prio + usr_prio return src_prio + usr_prio
priority = _get_priority(source, user, authenticated) priority = get_priority(source, user, authenticated)
perma_uuid = str(uuid4()) perma_uuid = str(uuid4())
p = self.redis.pipeline() p = self.redis.pipeline()
for key, value in query.items(): for key, value in query.items():
@ -427,7 +420,7 @@ class Lookyloo():
def _get_raw(self, capture_uuid: str, /, extension: str='*', all_files: bool=True) -> BytesIO: def _get_raw(self, capture_uuid: str, /, extension: str='*', all_files: bool=True) -> BytesIO:
'''Get file(s) from the capture directory''' '''Get file(s) from the capture directory'''
try: try:
capture_dir = self._get_capture_dir(capture_uuid) capture_dir = self._captures_index[capture_uuid].capture_dir
except MissingUUID: except MissingUUID:
return BytesIO(f'Capture {capture_uuid} not unavailable, try again later.'.encode()) return BytesIO(f'Capture {capture_uuid} not unavailable, try again later.'.encode())
except MissingCaptureDirectory: except MissingCaptureDirectory:
@ -607,6 +600,10 @@ class Lookyloo():
except IndexError: except IndexError:
# unable to find the uuid, the cache is probably in a weird state. # unable to find the uuid, the cache is probably in a weird state.
return None return None
except NoValidHarFile as e:
# something went poorly when rebuilding the tree (probably a recursive error)
self.logger.warning(e)
return None
if url.empty_response: if url.empty_response:
return None return None
if not h or h == url.body_hash: if not h or h == url.body_hash:
@ -800,7 +797,7 @@ class Lookyloo():
def get_hostnode_investigator(self, capture_uuid: str, /, node_uuid: str) -> Tuple[HostNode, List[Dict[str, Any]]]: def get_hostnode_investigator(self, capture_uuid: str, /, node_uuid: str) -> Tuple[HostNode, List[Dict[str, Any]]]:
'''Gather all the informations needed to display the Hostnode investigator popup.''' '''Gather all the informations needed to display the Hostnode investigator popup.'''
def _normalize_known_content(h: str, /, known_content: Dict[str, Any], url: URLNode) -> Tuple[Optional[Union[str, List[Any]]], Optional[Tuple[bool, Any]]]: def normalize_known_content(h: str, /, known_content: Dict[str, Any], url: URLNode) -> Tuple[Optional[Union[str, List[Any]]], Optional[Tuple[bool, Any]]]:
''' There are a few different sources to figure out known vs. legitimate content, ''' There are a few different sources to figure out known vs. legitimate content,
this method normalize it for the web interface.''' this method normalize it for the web interface.'''
known: Optional[Union[str, List[Any]]] = None known: Optional[Union[str, List[Any]]] = None
@ -861,13 +858,13 @@ class Lookyloo():
if freq_embedded['hash_freq'] > 1: if freq_embedded['hash_freq'] > 1:
to_append['embedded_ressources'][h]['other_captures'] = self.hash_lookup(h, url.name, capture_uuid) to_append['embedded_ressources'][h]['other_captures'] = self.hash_lookup(h, url.name, capture_uuid)
for h in to_append['embedded_ressources'].keys(): for h in to_append['embedded_ressources'].keys():
known, legitimate = _normalize_known_content(h, known_content, url) known, legitimate = normalize_known_content(h, known_content, url)
if known: if known:
to_append['embedded_ressources'][h]['known_content'] = known to_append['embedded_ressources'][h]['known_content'] = known
elif legitimate: elif legitimate:
to_append['embedded_ressources'][h]['legitimacy'] = legitimate to_append['embedded_ressources'][h]['legitimacy'] = legitimate
known, legitimate = _normalize_known_content(url.body_hash, known_content, url) known, legitimate = normalize_known_content(url.body_hash, known_content, url)
if known: if known:
to_append['known_content'] = known to_append['known_content'] = known
elif legitimate: elif legitimate:

32
poetry.lock generated
View File

@ -277,7 +277,7 @@ python-versions = "*"
[[package]] [[package]]
name = "filetype" name = "filetype"
version = "1.0.7" version = "1.0.8"
description = "Infer file type and MIME type of any file/buffer. No external dependencies." description = "Infer file type and MIME type of any file/buffer. No external dependencies."
category = "main" category = "main"
optional = false optional = false
@ -437,7 +437,7 @@ scripts = ["click (>=6.0)", "twisted (>=16.4.0)"]
[[package]] [[package]]
name = "ipython" name = "ipython"
version = "7.27.0" version = "7.28.0"
description = "IPython: Productive Interactive Computing" description = "IPython: Productive Interactive Computing"
category = "dev" category = "dev"
optional = false optional = false
@ -976,7 +976,7 @@ use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"]
[[package]] [[package]]
name = "rich" name = "rich"
version = "10.10.0" version = "10.11.0"
description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
category = "main" category = "main"
optional = false optional = false
@ -1199,7 +1199,7 @@ python-versions = "*"
[[package]] [[package]]
name = "types-requests" name = "types-requests"
version = "2.25.8" version = "2.25.9"
description = "Typing stubs for requests" description = "Typing stubs for requests"
category = "dev" category = "dev"
optional = false optional = false
@ -1223,7 +1223,7 @@ python-versions = "*"
[[package]] [[package]]
name = "urllib3" name = "urllib3"
version = "1.26.6" version = "1.26.7"
description = "HTTP library with thread-safe connection pooling, file post, and more." description = "HTTP library with thread-safe connection pooling, file post, and more."
category = "main" category = "main"
optional = false optional = false
@ -1317,7 +1317,7 @@ misp = ["python-magic", "pydeep"]
[metadata] [metadata]
lock-version = "1.1" lock-version = "1.1"
python-versions = "^3.8" python-versions = "^3.8"
content-hash = "bd09b20f35c4a361a109abcf950326519706a5938c436e9ac6f2e07ef0e782c2" content-hash = "2b182690d3f0bb7438ded19e043d0c6aab7870e5460f4e4942a56c626d8fed66"
[metadata.files] [metadata.files]
aiohttp = [ aiohttp = [
@ -1533,8 +1533,8 @@ ete3 = [
{file = "ete3-3.1.2.tar.gz", hash = "sha256:4fc987b8c529889d6608fab1101f1455cb5cbd42722788de6aea9c7d0a8e59e9"}, {file = "ete3-3.1.2.tar.gz", hash = "sha256:4fc987b8c529889d6608fab1101f1455cb5cbd42722788de6aea9c7d0a8e59e9"},
] ]
filetype = [ filetype = [
{file = "filetype-1.0.7-py2.py3-none-any.whl", hash = "sha256:353369948bb1c09b8b3ea3d78390b5586e9399bff9aab894a1dff954e31a66f6"}, {file = "filetype-1.0.8-py2.py3-none-any.whl", hash = "sha256:eb974519c5dbbd678a9fbfb5e2616105c6768ee9c01ce4a4ecaefc141e50d5e5"},
{file = "filetype-1.0.7.tar.gz", hash = "sha256:da393ece8d98b47edf2dd5a85a2c8733e44b769e32c71af4cd96ed8d38d96aa7"}, {file = "filetype-1.0.8.tar.gz", hash = "sha256:77df14879b73fd9711b8bd4f465dadf2ecdafff0eac3b22c0bdb0ccba68db316"},
] ]
flask = [ flask = [
{file = "Flask-2.0.1-py3-none-any.whl", hash = "sha256:a6209ca15eb63fc9385f38e452704113d679511d9574d09b2cf9183ae7d20dc9"}, {file = "Flask-2.0.1-py3-none-any.whl", hash = "sha256:a6209ca15eb63fc9385f38e452704113d679511d9574d09b2cf9183ae7d20dc9"},
@ -1623,8 +1623,8 @@ incremental = [
{file = "incremental-21.3.0.tar.gz", hash = "sha256:02f5de5aff48f6b9f665d99d48bfc7ec03b6e3943210de7cfc88856d755d6f57"}, {file = "incremental-21.3.0.tar.gz", hash = "sha256:02f5de5aff48f6b9f665d99d48bfc7ec03b6e3943210de7cfc88856d755d6f57"},
] ]
ipython = [ ipython = [
{file = "ipython-7.27.0-py3-none-any.whl", hash = "sha256:75b5e060a3417cf64f138e0bb78e58512742c57dc29db5a5058a2b1f0c10df02"}, {file = "ipython-7.28.0-py3-none-any.whl", hash = "sha256:f16148f9163e1e526f1008d7c8d966d9c15600ca20d1a754287cf96d00ba6f1d"},
{file = "ipython-7.27.0.tar.gz", hash = "sha256:58b55ebfdfa260dad10d509702dc2857cb25ad82609506b070cf2d7b7df5af13"}, {file = "ipython-7.28.0.tar.gz", hash = "sha256:2097be5c814d1b974aea57673176a924c4c8c9583890e7a5f082f547b9975b11"},
] ]
itemadapter = [ itemadapter = [
{file = "itemadapter-0.4.0-py3-none-any.whl", hash = "sha256:695809a4e2f42174f0392dd66c2ceb2b2454d3ebbf65a930e5c85910d8d88d8f"}, {file = "itemadapter-0.4.0-py3-none-any.whl", hash = "sha256:695809a4e2f42174f0392dd66c2ceb2b2454d3ebbf65a930e5c85910d8d88d8f"},
@ -2061,8 +2061,8 @@ requests = [
{file = "requests-2.26.0.tar.gz", hash = "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"}, {file = "requests-2.26.0.tar.gz", hash = "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"},
] ]
rich = [ rich = [
{file = "rich-10.10.0-py3-none-any.whl", hash = "sha256:0b8cbcb0b8d476a7f002feaed9f35e51615f673c6c291d76ddf0c555574fd3c7"}, {file = "rich-10.11.0-py3-none-any.whl", hash = "sha256:44bb3f9553d00b3c8938abf89828df870322b9ba43caf3b12bb7758debdc6dec"},
{file = "rich-10.10.0.tar.gz", hash = "sha256:bacf58b25fea6b920446fe4e7abdc6c7664c4530c4098e7a1bc79b16b8551dfa"}, {file = "rich-10.11.0.tar.gz", hash = "sha256:016fa105f34b69c434e7f908bb5bd7fefa9616efdb218a2917117683a6394ce5"},
] ]
scrapy = [ scrapy = [
{file = "Scrapy-2.5.0-py2.py3-none-any.whl", hash = "sha256:5f590fdc84b496e5a4bb5ef99836b0aa688a07cfcb4bc3bb7290f66486f27424"}, {file = "Scrapy-2.5.0-py2.py3-none-any.whl", hash = "sha256:5f590fdc84b496e5a4bb5ef99836b0aa688a07cfcb4bc3bb7290f66486f27424"},
@ -2139,8 +2139,8 @@ types-redis = [
{file = "types_redis-3.5.8-py3-none-any.whl", hash = "sha256:85814769071721044857c34841e46064b867ccdd58fc81221c43462bd07e4892"}, {file = "types_redis-3.5.8-py3-none-any.whl", hash = "sha256:85814769071721044857c34841e46064b867ccdd58fc81221c43462bd07e4892"},
] ]
types-requests = [ types-requests = [
{file = "types-requests-2.25.8.tar.gz", hash = "sha256:225ac2e86549b6ef3a8a44bf955f80b4955855704a15d2883d8445c8df637242"}, {file = "types-requests-2.25.9.tar.gz", hash = "sha256:4ec8b71da73e5344adb9bee725a74ec8598e7286f9bcb17500d627f259fe4fb9"},
{file = "types_requests-2.25.8-py3-none-any.whl", hash = "sha256:26e90866bcd773d76b316de7e6bd6e24641f9e1653cf27241c533886600f6824"}, {file = "types_requests-2.25.9-py3-none-any.whl", hash = "sha256:543ba8b3b23e38ac028da1d163aecbbc27d3cc8f654ae64339da539a191a2b1c"},
] ]
types-werkzeug = [ types-werkzeug = [
{file = "types-Werkzeug-1.0.5.tar.gz", hash = "sha256:f6216ab0e0211fe73ebdb4ae0e414113d4d8a2f783a15c2d8550e06d0fd8e7f9"}, {file = "types-Werkzeug-1.0.5.tar.gz", hash = "sha256:f6216ab0e0211fe73ebdb4ae0e414113d4d8a2f783a15c2d8550e06d0fd8e7f9"},
@ -2152,8 +2152,8 @@ typing-extensions = [
{file = "typing_extensions-3.10.0.2.tar.gz", hash = "sha256:49f75d16ff11f1cd258e1b988ccff82a3ca5570217d7ad8c5f48205dd99a677e"}, {file = "typing_extensions-3.10.0.2.tar.gz", hash = "sha256:49f75d16ff11f1cd258e1b988ccff82a3ca5570217d7ad8c5f48205dd99a677e"},
] ]
urllib3 = [ urllib3 = [
{file = "urllib3-1.26.6-py2.py3-none-any.whl", hash = "sha256:39fb8672126159acb139a7718dd10806104dec1e2f0f6c88aab05d17df10c8d4"}, {file = "urllib3-1.26.7-py2.py3-none-any.whl", hash = "sha256:c4fdf4019605b6e5423637e01bc9fe4daef873709a7973e195ceba0a62bbc844"},
{file = "urllib3-1.26.6.tar.gz", hash = "sha256:f57b4c16c62fa2760b7e3d97c35b255512fb6b59a259730f36ba32ce9f8e342f"}, {file = "urllib3-1.26.7.tar.gz", hash = "sha256:4987c65554f7a2dbf30c18fd48778ef124af6fab771a377103da0585e2336ece"},
] ]
vt-py = [ vt-py = [
{file = "vt-py-0.7.4.tar.gz", hash = "sha256:27af411495a115f6cc2da5d184a9f32c12a2304eca94efefaae03d2b12b66174"}, {file = "vt-py-0.7.4.tar.gz", hash = "sha256:27af411495a115f6cc2da5d184a9f32c12a2304eca94efefaae03d2b12b66174"},

View File

@ -61,7 +61,7 @@ lief = "^0.11.4"
Flask-Login = "^0.5.0" Flask-Login = "^0.5.0"
flask-restx = "^0.5.1" flask-restx = "^0.5.1"
hiredis = "^2.0.0" hiredis = "^2.0.0"
rich = "^10.10.0" rich = "^10.11.0"
pyphishtanklookup = "^1.0.1" pyphishtanklookup = "^1.0.1"
[tool.poetry.extras] [tool.poetry.extras]
@ -69,9 +69,9 @@ misp = ['python-magic', 'pydeep']
[tool.poetry.dev-dependencies] [tool.poetry.dev-dependencies]
mypy = "^0.910" mypy = "^0.910"
ipython = "^7.27.0" ipython = "^7.28.0"
types-redis = "^3.5.8" types-redis = "^3.5.8"
types-requests = "^2.25.8" types-requests = "^2.25.9"
types-Flask = "^1.1.3" types-Flask = "^1.1.3"
types-pkg-resources = "^0.1.2" types-pkg-resources = "^0.1.2"

View File

@ -1,12 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from lookyloo.lookyloo import Lookyloo
lookyloo = Lookyloo()
for capture_uuid in lookyloo.capture_uuids:
try:
ct = lookyloo.get_crawled_tree(capture_uuid)
except Exception:
continue

View File

@ -23,28 +23,9 @@ def main():
indexing = Indexing() indexing = Indexing()
indexing.clear_indexes() indexing.clear_indexes()
for capture_uuid in lookyloo.capture_uuids:
index = True
try:
tree = lookyloo.get_crawled_tree(capture_uuid)
except Exception as e:
print(capture_uuid, e)
continue
if lookyloo.is_public_instance: # This call will rebuild all the caches as needed.
cache = lookyloo.capture_cache(capture_uuid) lookyloo.sorted_capture_cache()
if not cache:
continue
if cache.no_index:
index = False
# NOTE: these methods do nothing if we just generated the pickle when calling lookyloo.get_crawled_tree
if index:
indexing.index_cookies_capture(tree)
indexing.index_body_hashes_capture(tree)
indexing.index_url_capture(tree)
categories = list(lookyloo.categories_capture(capture_uuid).keys())
indexing.index_categories_capture(capture_uuid, categories)
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -23,10 +23,7 @@ def uniq_domains(uniq_urls):
return domains return domains
for uuid in lookyloo.capture_uuids: for cache in lookyloo.sorted_capture_cache():
cache = lookyloo.capture_cache(uuid)
if not cache or not hasattr(cache, 'timestamp'):
continue
date = cache.timestamp date = cache.timestamp
if date.year not in stats: if date.year not in stats:
stats[date.year] = {} stats[date.year] = {}