diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index adca5dcb..fa1c5f2e 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -49,9 +49,6 @@ class Lookyloo(): self.logger.setLevel(loglevel) def _set_report_cache(self, report_dir: str): - no_index = 'False' - if (report_dir / 'no_index').exists(): # If the folders claims anonymity - no_index = 'True' # HMSet strings not boolean. har_files = sorted(report_dir.glob('*.har')) if not har_files: self.logger.warning(f'No har files in {report_dir}') @@ -63,7 +60,9 @@ class Lookyloo(): title = j['log']['pages'][0]['title'] if not title: title = '!! No title found !! ' - cache = {'uuid': uuid, 'title': title, 'no_index': no_index} + cache = {'uuid': uuid, 'title': title} + if (report_dir / 'no_index').exists(): # If the folders claims anonymity + cache['no_index'] = 1 self.redis.hmset(str(report_dir), cache) self.redis.hset('lookup_dirs', uuid, str(report_dir)) @@ -138,7 +137,7 @@ class Lookyloo(): return self.sanejs.sha512(sha512) return {'response': []} - def scrape(self, url, depth: int=1, listing: bool=True, user_agent: str=None, perma_uuid: str=None): + def scrape(self, url, depth: int=1, listing: bool=True, user_agent: str=None, perma_uuid: str=None): if not url.startswith('http'): url = f'http://{url}' items = crawl(self.splash_url, url, depth, user_agent=user_agent, log_enabled=True, log_level='INFO') @@ -151,7 +150,7 @@ class Lookyloo(): dirpath = self.scrape_dir / datetime.now().isoformat() dirpath.mkdir() if not listing: # Write no_index marker - (dirpath / 'no_index').open('w') + (dirpath / 'no_index').touch() for i, item in enumerate(items): harfile = item['har'] png = base64.b64decode(item['png']) diff --git a/website/web/__init__.py b/website/web/__init__.py index 9e266b95..cedc33f2 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -52,10 +52,7 @@ def submit(): @app.route('/scrape', methods=['GET', 'POST']) def scrape_web(): if request.form.get('url'): - listing = False - if request.form.get('listing') == "True": - listing = True - perma_uuid = lookyloo.scrape(request.form.get('url'), request.form.get('depth'), listing) + perma_uuid = lookyloo.scrape(request.form.get('url'), request.form.get('depth'), request.form.get('listing')) return redirect(url_for('tree', tree_uuid=perma_uuid)) return render_template('scrape.html') @@ -149,8 +146,7 @@ def index(): titles = [] for report_dir in lookyloo.report_dirs: cached = lookyloo.report_cache(report_dir) - if not cached: + if not cached or 'no_index' in cached: continue - if cached['no_index'] == 'False': # Hide no_index requests - titles.append((cached['uuid'], cached['title'])) + titles.append((cached['uuid'], cached['title'])) return render_template('index.html', titles=titles) diff --git a/website/web/templates/scrape.html b/website/web/templates/scrape.html index ea8c6ed1..3a9551fd 100644 --- a/website/web/templates/scrape.html +++ b/website/web/templates/scrape.html @@ -22,7 +22,7 @@ - +