mirror of https://github.com/CIRCL/lookyloo
chg: Expose the picklized tree in lookyloo
parent
9873d3ebc3
commit
6871912e64
|
@ -314,25 +314,27 @@ class Lookyloo():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.exception(e)
|
logging.exception(e)
|
||||||
|
|
||||||
def load_tree(self, capture_dir: Path) -> Tuple[str, str, str, str, Dict[str, str]]:
|
def get_crawled_tree(self, capture_dir: Path) -> CrawledTree:
|
||||||
har_files = sorted(capture_dir.glob('*.har'))
|
|
||||||
pickle_file = capture_dir / 'tree.pickle'
|
pickle_file = capture_dir / 'tree.pickle'
|
||||||
try:
|
ct = self._load_pickle(pickle_file)
|
||||||
meta = {}
|
if not ct:
|
||||||
if (capture_dir / 'meta').exists():
|
with open((capture_dir / 'uuid'), 'r') as f:
|
||||||
# NOTE: Legacy, the meta file should be present
|
uuid = f.read()
|
||||||
with open((capture_dir / 'meta'), 'r') as f:
|
har_files = sorted(capture_dir.glob('*.har'))
|
||||||
meta = json.load(f)
|
try:
|
||||||
ct = self._load_pickle(pickle_file)
|
|
||||||
if not ct:
|
|
||||||
with open((capture_dir / 'uuid'), 'r') as f:
|
|
||||||
uuid = f.read()
|
|
||||||
ct = CrawledTree(har_files, uuid)
|
ct = CrawledTree(har_files, uuid)
|
||||||
with pickle_file.open('wb') as _p:
|
except Har2TreeError as e:
|
||||||
pickle.dump(ct, _p)
|
raise NoValidHarFile(e.message)
|
||||||
return ct.to_json(), ct.start_time.isoformat(), ct.user_agent, ct.root_url, meta
|
with pickle_file.open('wb') as _p:
|
||||||
except Har2TreeError as e:
|
pickle.dump(ct, _p)
|
||||||
raise NoValidHarFile(e.message)
|
return ct
|
||||||
|
|
||||||
|
def load_tree(self, capture_dir: Path) -> Tuple[str, str, str, str, Dict[str, str]]:
|
||||||
|
meta = {}
|
||||||
|
with open((capture_dir / 'meta'), 'r') as f:
|
||||||
|
meta = json.load(f)
|
||||||
|
ct = self.get_crawled_tree(capture_dir)
|
||||||
|
return ct.to_json(), ct.start_time.isoformat(), ct.user_agent, ct.root_url, meta
|
||||||
|
|
||||||
def _get_raw(self, capture_dir: Path, extension: str='*', all_files: bool=True) -> BytesIO:
|
def _get_raw(self, capture_dir: Path, extension: str='*', all_files: bool=True) -> BytesIO:
|
||||||
all_paths = sorted(list(capture_dir.glob(f'*.{extension}')))
|
all_paths = sorted(list(capture_dir.glob(f'*.{extension}')))
|
||||||
|
|
Loading…
Reference in New Issue