diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index f075414..e94cd17 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -125,13 +125,17 @@ class Lookyloo(): def load_tree(self, report_dir: Path): har_files = sorted(report_dir.glob('*.har')) try: + meta = {} + if (report_dir / 'meta').exists(): + with open((report_dir / 'meta'), 'r') as f: + meta = json.load(f) ct = CrawledTree(har_files) ct.find_parents() ct.join_trees() temp = tempfile.NamedTemporaryFile(prefix='lookyloo', delete=False) pickle.dump(ct, temp) temp.close() - return temp.name, ct.to_json(), ct.start_time.isoformat(), ct.user_agent, ct.root_url + return temp.name, ct.to_json(), ct.start_time.isoformat(), ct.user_agent, ct.root_url, meta except Har2TreeError as e: raise NoValidHarFile(e.message) @@ -149,7 +153,8 @@ class Lookyloo(): return self.sanejs.sha512(sha512) return {'response': []} - def scrape(self, url, depth: int=1, listing: bool=True, user_agent: str=None, perma_uuid: str=None): + def scrape(self, url, depth: int=1, listing: bool=True, user_agent: str=None, perma_uuid: str=None, + os: str=None, browser: str=None): if not url.startswith('http'): url = f'http://{url}' items = crawl(self.splash_url, url, depth, user_agent=user_agent, log_enabled=True, log_level='INFO') @@ -161,8 +166,6 @@ class Lookyloo(): width = len(str(len(items))) dirpath = self.scrape_dir / datetime.now().isoformat() dirpath.mkdir() - if not listing: # Write no_index marker - (dirpath / 'no_index').touch() for i, item in enumerate(items): harfile = item['har'] png = base64.b64decode(item['png']) @@ -178,5 +181,15 @@ class Lookyloo(): json.dump(child_frames, f) with (dirpath / 'uuid').open('w') as f: f.write(perma_uuid) + if not listing: # Write no_index marker + (dirpath / 'no_index').touch() + if os or browser: + meta = {} + if os: + meta['os'] = os + if browser: + meta['browser'] = browser + with (dirpath / 'meta').open('w') as f: + json.dump(meta, f) self._set_report_cache(dirpath) return perma_uuid diff --git a/website/web/__init__.py b/website/web/__init__.py index 0227bc5..e210a52 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -37,9 +37,9 @@ lookyloo = Lookyloo() # keep def load_tree(report_dir): session.clear() - temp_file_name, tree_json, tree_time, tree_ua, tree_root_url = lookyloo.load_tree(report_dir) + temp_file_name, tree_json, tree_time, tree_ua, tree_root_url, meta = lookyloo.load_tree(report_dir) session["tree"] = temp_file_name - return tree_json, tree_time, tree_ua, tree_root_url + return tree_json, tree_time, tree_ua, tree_root_url, meta @app.route('/submit', methods=['POST', 'GET']) @@ -53,7 +53,8 @@ def submit(): def scrape_web(): if request.form.get('url'): perma_uuid = lookyloo.scrape(url=request.form.get('url'), depth=request.form.get('depth'), - listing=request.form.get('listing'), user_agent=request.form.get('user_agent')) + listing=request.form.get('listing'), user_agent=request.form.get('user_agent'), + os=request.form.get('os'), browser=request.form.get('browser')) return redirect(url_for('tree', tree_uuid=perma_uuid)) user_agents = get_user_agents() user_agents.pop('by_frequency') @@ -132,9 +133,10 @@ def tree(tree_uuid): return redirect(url_for('index')) try: - tree_json, start_time, user_agent, root_url = load_tree(report_dir) + tree_json, start_time, user_agent, root_url, meta = load_tree(report_dir) return render_template('tree.html', tree_json=tree_json, start_time=start_time, - user_agent=user_agent, root_url=root_url, tree_uuid=tree_uuid) + user_agent=user_agent, root_url=root_url, tree_uuid=tree_uuid, + meta=meta) except NoValidHarFile as e: return render_template('error.html', error_message=e) diff --git a/website/web/templates/tree.html b/website/web/templates/tree.html index 0a72a6e..10848ba 100644 --- a/website/web/templates/tree.html +++ b/website/web/templates/tree.html @@ -62,6 +62,11 @@ Root URL: {{ root_url }}
Start time: {{ start_time }}
User Agent: {{ user_agent }}
+ {% if meta %} + {%for k, v in meta.items()%} + {{k.title()}}: {{ v }}
+ {%endfor%} + {%endif%}
Download Image