diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index 21b5c5b1..9a0f2700 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -48,6 +48,9 @@ class Lookyloo(): self.logger.setLevel(loglevel) def _set_report_cache(self, report_dir: str): + noindex = 'False' + if (report_dir / 'noindex').exists(): # If the folders claims anonymity + noindex = 'True' # HMSet strings not boolean. har_files = sorted(report_dir.glob('*.har')) if not har_files: self.logger.warning(f'No har files in {report_dir}') @@ -59,7 +62,7 @@ class Lookyloo(): title = j['log']['pages'][0]['title'] if not title: title = '!! No title found !! ' - cache = {'uuid': uuid, 'title': title} + cache = {'uuid': uuid, 'title': title, 'noindex': noindex} self.redis.hmset(str(report_dir), cache) self.redis.hset('lookup_dirs', uuid, str(report_dir)) @@ -131,7 +134,7 @@ class Lookyloo(): return self.sanejs.sha512(sha512) return {'response': []} - def scrape(self, url, depth: int=1, user_agent: str=None, perma_uuid: str=None): + def scrape(self, url, depth: int=1, listing: bool=True, user_agent: str=None, perma_uuid: str=None): if not url.startswith('http'): url = f'http://{url}' items = crawl(self.splash_url, url, depth, user_agent=user_agent, log_enabled=True, log_level='INFO') @@ -143,6 +146,8 @@ class Lookyloo(): width = len(str(len(items))) dirpath = self.scrape_dir / datetime.now().isoformat() dirpath.mkdir() + if not listing: # Write noindex marker + (dirpath / 'noindex').open('w') for i, item in enumerate(items): harfile = item['har'] png = base64.b64decode(item['png']) diff --git a/website/web/__init__.py b/website/web/__init__.py index 7f67946d..170da925 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -51,7 +51,10 @@ def submit(): @app.route('/scrape', methods=['GET', 'POST']) def scrape_web(): if request.form.get('url'): - perma_uuid = lookyloo.scrape(request.form.get('url'), request.form.get('depth')) + listing = False + if request.form.get('listing') == "True": + listing = True + perma_uuid = lookyloo.scrape(request.form.get('url'), request.form.get('depth'), listing) return redirect(url_for('tree', tree_uuid=perma_uuid)) return render_template('scrape.html') @@ -144,6 +147,7 @@ def index(): cached = lookyloo.report_cache(report_dir) if not cached: continue - titles.append((cached['uuid'], cached['title'])) + if cached['noindex'] == 'False': # Hide noindex requests + titles.append((cached['uuid'], cached['title'])) return render_template('index.html', titles=titles) diff --git a/website/web/templates/scrape.html b/website/web/templates/scrape.html index 6b3cbef8..ea8c6ed1 100644 --- a/website/web/templates/scrape.html +++ b/website/web/templates/scrape.html @@ -21,6 +21,8 @@ + +