From f029b5d32458416888a064f037ec970b9a74c108 Mon Sep 17 00:00:00 2001 From: thanat0s Date: Sun, 17 Feb 2019 17:43:25 +0000 Subject: [PATCH 1/3] Add noindex button --- lookyloo/lookyloo.py | 9 +++++++-- website/web/__init__.py | 8 ++++++-- website/web/templates/scrape.html | 2 ++ 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index 21b5c5b1..9a0f2700 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -48,6 +48,9 @@ class Lookyloo(): self.logger.setLevel(loglevel) def _set_report_cache(self, report_dir: str): + noindex = 'False' + if (report_dir / 'noindex').exists(): # If the folders claims anonymity + noindex = 'True' # HMSet strings not boolean. har_files = sorted(report_dir.glob('*.har')) if not har_files: self.logger.warning(f'No har files in {report_dir}') @@ -59,7 +62,7 @@ class Lookyloo(): title = j['log']['pages'][0]['title'] if not title: title = '!! No title found !! ' - cache = {'uuid': uuid, 'title': title} + cache = {'uuid': uuid, 'title': title, 'noindex': noindex} self.redis.hmset(str(report_dir), cache) self.redis.hset('lookup_dirs', uuid, str(report_dir)) @@ -131,7 +134,7 @@ class Lookyloo(): return self.sanejs.sha512(sha512) return {'response': []} - def scrape(self, url, depth: int=1, user_agent: str=None, perma_uuid: str=None): + def scrape(self, url, depth: int=1, listing: bool=True, user_agent: str=None, perma_uuid: str=None): if not url.startswith('http'): url = f'http://{url}' items = crawl(self.splash_url, url, depth, user_agent=user_agent, log_enabled=True, log_level='INFO') @@ -143,6 +146,8 @@ class Lookyloo(): width = len(str(len(items))) dirpath = self.scrape_dir / datetime.now().isoformat() dirpath.mkdir() + if not listing: # Write noindex marker + (dirpath / 'noindex').open('w') for i, item in enumerate(items): harfile = item['har'] png = base64.b64decode(item['png']) diff --git a/website/web/__init__.py b/website/web/__init__.py index 7f67946d..170da925 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -51,7 +51,10 @@ def submit(): @app.route('/scrape', methods=['GET', 'POST']) def scrape_web(): if request.form.get('url'): - perma_uuid = lookyloo.scrape(request.form.get('url'), request.form.get('depth')) + listing = False + if request.form.get('listing') == "True": + listing = True + perma_uuid = lookyloo.scrape(request.form.get('url'), request.form.get('depth'), listing) return redirect(url_for('tree', tree_uuid=perma_uuid)) return render_template('scrape.html') @@ -144,6 +147,7 @@ def index(): cached = lookyloo.report_cache(report_dir) if not cached: continue - titles.append((cached['uuid'], cached['title'])) + if cached['noindex'] == 'False': # Hide noindex requests + titles.append((cached['uuid'], cached['title'])) return render_template('index.html', titles=titles) diff --git a/website/web/templates/scrape.html b/website/web/templates/scrape.html index 6b3cbef8..ea8c6ed1 100644 --- a/website/web/templates/scrape.html +++ b/website/web/templates/scrape.html @@ -21,6 +21,8 @@ + + From 8afcc30840c0f1d09a06ee7a4351d1041b2fbeb9 Mon Sep 17 00:00:00 2001 From: Th4nat0s Date: Sun, 17 Feb 2019 21:45:23 +0000 Subject: [PATCH 2/3] clarifie index enable/disable --- lookyloo/lookyloo.py | 12 ++++++------ website/web/__init__.py | 3 +-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index 9a0f2700..4b310887 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -48,9 +48,9 @@ class Lookyloo(): self.logger.setLevel(loglevel) def _set_report_cache(self, report_dir: str): - noindex = 'False' - if (report_dir / 'noindex').exists(): # If the folders claims anonymity - noindex = 'True' # HMSet strings not boolean. + no_index = 'False' + if (report_dir / 'no_index').exists(): # If the folders claims anonymity + no_index = 'True' # HMSet strings not boolean. har_files = sorted(report_dir.glob('*.har')) if not har_files: self.logger.warning(f'No har files in {report_dir}') @@ -62,7 +62,7 @@ class Lookyloo(): title = j['log']['pages'][0]['title'] if not title: title = '!! No title found !! ' - cache = {'uuid': uuid, 'title': title, 'noindex': noindex} + cache = {'uuid': uuid, 'title': title, 'no_index': no_index} self.redis.hmset(str(report_dir), cache) self.redis.hset('lookup_dirs', uuid, str(report_dir)) @@ -146,8 +146,8 @@ class Lookyloo(): width = len(str(len(items))) dirpath = self.scrape_dir / datetime.now().isoformat() dirpath.mkdir() - if not listing: # Write noindex marker - (dirpath / 'noindex').open('w') + if not listing: # Write no_index marker + (dirpath / 'no_index').open('w') for i, item in enumerate(items): harfile = item['har'] png = base64.b64decode(item['png']) diff --git a/website/web/__init__.py b/website/web/__init__.py index 170da925..ae88244b 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -147,7 +147,6 @@ def index(): cached = lookyloo.report_cache(report_dir) if not cached: continue - if cached['noindex'] == 'False': # Hide noindex requests + if cached['no_index'] == 'False': # Hide no_index requests titles.append((cached['uuid'], cached['title'])) - return render_template('index.html', titles=titles) From fdcc566e05bbba518d83e4a120b721ef3d8f01a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Mon, 18 Feb 2019 14:29:15 +0100 Subject: [PATCH 3/3] chg: Slight cleanup --- lookyloo/lookyloo.py | 11 +++++------ website/web/__init__.py | 10 +++------- website/web/templates/scrape.html | 2 +- 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index adca5dcb..fa1c5f2e 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -49,9 +49,6 @@ class Lookyloo(): self.logger.setLevel(loglevel) def _set_report_cache(self, report_dir: str): - no_index = 'False' - if (report_dir / 'no_index').exists(): # If the folders claims anonymity - no_index = 'True' # HMSet strings not boolean. har_files = sorted(report_dir.glob('*.har')) if not har_files: self.logger.warning(f'No har files in {report_dir}') @@ -63,7 +60,9 @@ class Lookyloo(): title = j['log']['pages'][0]['title'] if not title: title = '!! No title found !! ' - cache = {'uuid': uuid, 'title': title, 'no_index': no_index} + cache = {'uuid': uuid, 'title': title} + if (report_dir / 'no_index').exists(): # If the folders claims anonymity + cache['no_index'] = 1 self.redis.hmset(str(report_dir), cache) self.redis.hset('lookup_dirs', uuid, str(report_dir)) @@ -138,7 +137,7 @@ class Lookyloo(): return self.sanejs.sha512(sha512) return {'response': []} - def scrape(self, url, depth: int=1, listing: bool=True, user_agent: str=None, perma_uuid: str=None): + def scrape(self, url, depth: int=1, listing: bool=True, user_agent: str=None, perma_uuid: str=None): if not url.startswith('http'): url = f'http://{url}' items = crawl(self.splash_url, url, depth, user_agent=user_agent, log_enabled=True, log_level='INFO') @@ -151,7 +150,7 @@ class Lookyloo(): dirpath = self.scrape_dir / datetime.now().isoformat() dirpath.mkdir() if not listing: # Write no_index marker - (dirpath / 'no_index').open('w') + (dirpath / 'no_index').touch() for i, item in enumerate(items): harfile = item['har'] png = base64.b64decode(item['png']) diff --git a/website/web/__init__.py b/website/web/__init__.py index 9e266b95..cedc33f2 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -52,10 +52,7 @@ def submit(): @app.route('/scrape', methods=['GET', 'POST']) def scrape_web(): if request.form.get('url'): - listing = False - if request.form.get('listing') == "True": - listing = True - perma_uuid = lookyloo.scrape(request.form.get('url'), request.form.get('depth'), listing) + perma_uuid = lookyloo.scrape(request.form.get('url'), request.form.get('depth'), request.form.get('listing')) return redirect(url_for('tree', tree_uuid=perma_uuid)) return render_template('scrape.html') @@ -149,8 +146,7 @@ def index(): titles = [] for report_dir in lookyloo.report_dirs: cached = lookyloo.report_cache(report_dir) - if not cached: + if not cached or 'no_index' in cached: continue - if cached['no_index'] == 'False': # Hide no_index requests - titles.append((cached['uuid'], cached['title'])) + titles.append((cached['uuid'], cached['title'])) return render_template('index.html', titles=titles) diff --git a/website/web/templates/scrape.html b/website/web/templates/scrape.html index ea8c6ed1..3a9551fd 100644 --- a/website/web/templates/scrape.html +++ b/website/web/templates/scrape.html @@ -22,7 +22,7 @@ - +