mirror of https://github.com/CIRCL/lookyloo
fix: Rebuild caches when tree doesn't exists
parent
23419a31b9
commit
0c5501016c
|
@ -24,6 +24,15 @@ if __name__ == '__main__':
|
|||
indexing = Indexing()
|
||||
indexing.clear_indexes()
|
||||
for capture_dir in lookyloo.capture_dirs:
|
||||
try:
|
||||
tree = load_pickle_tree(capture_dir)
|
||||
except Exception as e:
|
||||
print(capture_dir, e)
|
||||
if tree:
|
||||
indexing.index_cookies_capture(tree)
|
||||
indexing.index_body_hashes_capture(tree)
|
||||
else:
|
||||
try:
|
||||
lookyloo.cache_tree(capture_dir=capture_dir)
|
||||
except Exception as e:
|
||||
print(capture_dir, e)
|
||||
|
|
|
@ -20,6 +20,7 @@ from urllib.parse import urlsplit
|
|||
from uuid import uuid4
|
||||
from zipfile import ZipFile
|
||||
|
||||
import publicsuffix2 # type: ignore
|
||||
from defang import refang # type: ignore
|
||||
from har2tree import CrawledTree, Har2TreeError, HarFile, HostNode, URLNode
|
||||
from redis import Redis
|
||||
|
@ -209,14 +210,24 @@ class Lookyloo():
|
|||
with self_generated_ua_file.open('w') as f:
|
||||
json.dump(to_store, f, indent=2)
|
||||
|
||||
def cache_tree(self, capture_uuid) -> None:
|
||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
||||
if not capture_dir:
|
||||
def cache_tree(self, capture_uuid: Optional[str]=None, capture_dir: Optional[Union[str, Path]]=None) -> None:
|
||||
c_dir = None
|
||||
if capture_uuid:
|
||||
c_dir = self.lookup_capture_dir(capture_uuid)
|
||||
if not c_dir:
|
||||
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
|
||||
elif capture_dir:
|
||||
if isinstance(capture_dir, str):
|
||||
c_dir = Path(capture_dir)
|
||||
else:
|
||||
c_dir = capture_dir
|
||||
|
||||
with open((capture_dir / 'uuid'), 'r') as f:
|
||||
if not c_dir:
|
||||
raise Exception('Need either capture_uuid or capture_dir')
|
||||
|
||||
with open((c_dir / 'uuid'), 'r') as f:
|
||||
uuid = f.read()
|
||||
har_files = sorted(capture_dir.glob('*.har'))
|
||||
har_files = sorted(c_dir.glob('*.har'))
|
||||
try:
|
||||
ct = CrawledTree(har_files, uuid)
|
||||
self.indexing.index_cookies_capture(ct)
|
||||
|
@ -224,7 +235,7 @@ class Lookyloo():
|
|||
except Har2TreeError as e:
|
||||
raise NoValidHarFile(e.message)
|
||||
|
||||
with (capture_dir / 'tree.pickle').open('wb') as _p:
|
||||
with (c_dir / 'tree.pickle').open('wb') as _p:
|
||||
pickle.dump(ct, _p)
|
||||
|
||||
def get_crawled_tree(self, capture_uuid: str) -> CrawledTree:
|
||||
|
|
Loading…
Reference in New Issue