diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index bb77f9dc..adca5dcb 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -49,6 +49,9 @@ class Lookyloo(): self.logger.setLevel(loglevel) def _set_report_cache(self, report_dir: str): + no_index = 'False' + if (report_dir / 'no_index').exists(): # If the folders claims anonymity + no_index = 'True' # HMSet strings not boolean. har_files = sorted(report_dir.glob('*.har')) if not har_files: self.logger.warning(f'No har files in {report_dir}') @@ -60,7 +63,7 @@ class Lookyloo(): title = j['log']['pages'][0]['title'] if not title: title = '!! No title found !! ' - cache = {'uuid': uuid, 'title': title} + cache = {'uuid': uuid, 'title': title, 'no_index': no_index} self.redis.hmset(str(report_dir), cache) self.redis.hset('lookup_dirs', uuid, str(report_dir)) @@ -135,7 +138,7 @@ class Lookyloo(): return self.sanejs.sha512(sha512) return {'response': []} - def scrape(self, url, depth: int=1, user_agent: str=None, perma_uuid: str=None): + def scrape(self, url, depth: int=1, listing: bool=True, user_agent: str=None, perma_uuid: str=None): if not url.startswith('http'): url = f'http://{url}' items = crawl(self.splash_url, url, depth, user_agent=user_agent, log_enabled=True, log_level='INFO') @@ -147,6 +150,8 @@ class Lookyloo(): width = len(str(len(items))) dirpath = self.scrape_dir / datetime.now().isoformat() dirpath.mkdir() + if not listing: # Write no_index marker + (dirpath / 'no_index').open('w') for i, item in enumerate(items): harfile = item['har'] png = base64.b64decode(item['png']) diff --git a/website/web/__init__.py b/website/web/__init__.py index 730da195..9e266b95 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -52,7 +52,10 @@ def submit(): @app.route('/scrape', methods=['GET', 'POST']) def scrape_web(): if request.form.get('url'): - perma_uuid = lookyloo.scrape(request.form.get('url'), request.form.get('depth')) + listing = False + if request.form.get('listing') == "True": + listing = True + perma_uuid = lookyloo.scrape(request.form.get('url'), request.form.get('depth'), listing) return redirect(url_for('tree', tree_uuid=perma_uuid)) return render_template('scrape.html') @@ -148,6 +151,6 @@ def index(): cached = lookyloo.report_cache(report_dir) if not cached: continue - titles.append((cached['uuid'], cached['title'])) - + if cached['no_index'] == 'False': # Hide no_index requests + titles.append((cached['uuid'], cached['title'])) return render_template('index.html', titles=titles) diff --git a/website/web/templates/scrape.html b/website/web/templates/scrape.html index 6b3cbef8..ea8c6ed1 100644 --- a/website/web/templates/scrape.html +++ b/website/web/templates/scrape.html @@ -21,6 +21,8 @@ + +