diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index 808d86a8..21b5c5b1 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -36,6 +36,8 @@ class Lookyloo(): if not self.scrape_dir.exists(): self.scrape_dir.mkdir(parents=True, exist_ok=True) + self._init_existing_dumps() + # Try to reach sanejs self.sanejs = SaneJS() if not self.sanejs.is_up: @@ -45,6 +47,31 @@ class Lookyloo(): self.logger = logging.getLogger(f'{self.__class__.__name__}') self.logger.setLevel(loglevel) + def _set_report_cache(self, report_dir: str): + har_files = sorted(report_dir.glob('*.har')) + if not har_files: + self.logger.warning(f'No har files in {report_dir}') + return + with (report_dir / 'uuid').open() as f: + uuid = f.read().strip() + with har_files[0].open() as f: + j = json.load(f) + title = j['log']['pages'][0]['title'] + if not title: + title = '!! No title found !! ' + cache = {'uuid': uuid, 'title': title} + self.redis.hmset(str(report_dir), cache) + self.redis.hset('lookup_dirs', uuid, str(report_dir)) + + def report_cache(self, report_dir) -> dict: + if isinstance(report_dir, Path): + report_dir = str(report_dir) + return self.redis.hgetall(report_dir) + + def _init_existing_dumps(self): + for report_dir in self.report_dirs: + self._set_report_cache(report_dir) + @property def report_dirs(self): for report_dir in self.scrape_dir.iterdir(): @@ -57,14 +84,11 @@ class Lookyloo(): f.write(str(uuid4())) return sorted(self.scrape_dir.iterdir(), reverse=True) - @property - def lookup_dirs(self): - # Build lookup table trees - lookup_dirs = {} - for report_dir in self.report_dirs: - with (report_dir / 'uuid').open() as f: - lookup_dirs[f.read().strip()] = report_dir - return lookup_dirs + def lookup_report_dir(self, uuid) -> Path: + report_dir = self.redis.hget('lookup_dirs', uuid) + if report_dir: + return Path(report_dir) + return None def enqueue_scrape(self, query: dict): perma_uuid = str(uuid4()) @@ -134,4 +158,5 @@ class Lookyloo(): json.dump(child_frames, f) with (dirpath / 'uuid').open('w') as f: f.write(perma_uuid) + self._set_report_cache(dirpath) return perma_uuid diff --git a/website/web/__init__.py b/website/web/__init__.py index 389db7f8..7f67946d 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -113,7 +113,7 @@ def urlnode_details(node_uuid): @app.route('/tree//image', methods=['GET']) def image(tree_uuid): - report_dir = lookyloo.lookup_dirs.get(tree_uuid) + report_dir = lookyloo.lookup_report_dir(tree_uuid) if not report_dir: return Response('Not available.', mimetype='text/text') to_return = lookyloo.load_image(report_dir) @@ -123,7 +123,7 @@ def image(tree_uuid): @app.route('/tree/', methods=['GET']) def tree(tree_uuid): - report_dir = lookyloo.lookup_dirs.get(tree_uuid) + report_dir = lookyloo.lookup_report_dir(tree_uuid) if not report_dir: return redirect(url_for('index')) @@ -141,14 +141,9 @@ def index(): session.clear() titles = [] for report_dir in lookyloo.report_dirs: - har_files = sorted(report_dir.glob('*.har')) - if not har_files: + cached = lookyloo.report_cache(report_dir) + if not cached: continue - with har_files[0].open() as f: - j = json.load(f) - title = j['log']['pages'][0]['title'] - with (report_dir / 'uuid').open() as f: - uuid = f.read().strip() - titles.append((uuid, title)) + titles.append((cached['uuid'], cached['title'])) return render_template('index.html', titles=titles)