mirror of https://github.com/CIRCL/lookyloo
fix: Rebuild caches when tree doesn't exists
parent
23419a31b9
commit
0c5501016c
|
@ -24,6 +24,15 @@ if __name__ == '__main__':
|
||||||
indexing = Indexing()
|
indexing = Indexing()
|
||||||
indexing.clear_indexes()
|
indexing.clear_indexes()
|
||||||
for capture_dir in lookyloo.capture_dirs:
|
for capture_dir in lookyloo.capture_dirs:
|
||||||
tree = load_pickle_tree(capture_dir)
|
try:
|
||||||
indexing.index_cookies_capture(tree)
|
tree = load_pickle_tree(capture_dir)
|
||||||
indexing.index_body_hashes_capture(tree)
|
except Exception as e:
|
||||||
|
print(capture_dir, e)
|
||||||
|
if tree:
|
||||||
|
indexing.index_cookies_capture(tree)
|
||||||
|
indexing.index_body_hashes_capture(tree)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
lookyloo.cache_tree(capture_dir=capture_dir)
|
||||||
|
except Exception as e:
|
||||||
|
print(capture_dir, e)
|
||||||
|
|
|
@ -20,6 +20,7 @@ from urllib.parse import urlsplit
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
|
|
||||||
|
import publicsuffix2 # type: ignore
|
||||||
from defang import refang # type: ignore
|
from defang import refang # type: ignore
|
||||||
from har2tree import CrawledTree, Har2TreeError, HarFile, HostNode, URLNode
|
from har2tree import CrawledTree, Har2TreeError, HarFile, HostNode, URLNode
|
||||||
from redis import Redis
|
from redis import Redis
|
||||||
|
@ -209,14 +210,24 @@ class Lookyloo():
|
||||||
with self_generated_ua_file.open('w') as f:
|
with self_generated_ua_file.open('w') as f:
|
||||||
json.dump(to_store, f, indent=2)
|
json.dump(to_store, f, indent=2)
|
||||||
|
|
||||||
def cache_tree(self, capture_uuid) -> None:
|
def cache_tree(self, capture_uuid: Optional[str]=None, capture_dir: Optional[Union[str, Path]]=None) -> None:
|
||||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
c_dir = None
|
||||||
if not capture_dir:
|
if capture_uuid:
|
||||||
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
|
c_dir = self.lookup_capture_dir(capture_uuid)
|
||||||
|
if not c_dir:
|
||||||
|
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
|
||||||
|
elif capture_dir:
|
||||||
|
if isinstance(capture_dir, str):
|
||||||
|
c_dir = Path(capture_dir)
|
||||||
|
else:
|
||||||
|
c_dir = capture_dir
|
||||||
|
|
||||||
with open((capture_dir / 'uuid'), 'r') as f:
|
if not c_dir:
|
||||||
|
raise Exception('Need either capture_uuid or capture_dir')
|
||||||
|
|
||||||
|
with open((c_dir / 'uuid'), 'r') as f:
|
||||||
uuid = f.read()
|
uuid = f.read()
|
||||||
har_files = sorted(capture_dir.glob('*.har'))
|
har_files = sorted(c_dir.glob('*.har'))
|
||||||
try:
|
try:
|
||||||
ct = CrawledTree(har_files, uuid)
|
ct = CrawledTree(har_files, uuid)
|
||||||
self.indexing.index_cookies_capture(ct)
|
self.indexing.index_cookies_capture(ct)
|
||||||
|
@ -224,7 +235,7 @@ class Lookyloo():
|
||||||
except Har2TreeError as e:
|
except Har2TreeError as e:
|
||||||
raise NoValidHarFile(e.message)
|
raise NoValidHarFile(e.message)
|
||||||
|
|
||||||
with (capture_dir / 'tree.pickle').open('wb') as _p:
|
with (c_dir / 'tree.pickle').open('wb') as _p:
|
||||||
pickle.dump(ct, _p)
|
pickle.dump(ct, _p)
|
||||||
|
|
||||||
def get_crawled_tree(self, capture_uuid: str) -> CrawledTree:
|
def get_crawled_tree(self, capture_uuid: str) -> CrawledTree:
|
||||||
|
|
Loading…
Reference in New Issue