mirror of https://github.com/CIRCL/lookyloo
commit
44e9c35ac7
|
@ -61,6 +61,8 @@ class Lookyloo():
|
||||||
if not title:
|
if not title:
|
||||||
title = '!! No title found !! '
|
title = '!! No title found !! '
|
||||||
cache = {'uuid': uuid, 'title': title}
|
cache = {'uuid': uuid, 'title': title}
|
||||||
|
if (report_dir / 'no_index').exists(): # If the folders claims anonymity
|
||||||
|
cache['no_index'] = 1
|
||||||
self.redis.hmset(str(report_dir), cache)
|
self.redis.hmset(str(report_dir), cache)
|
||||||
self.redis.hset('lookup_dirs', uuid, str(report_dir))
|
self.redis.hset('lookup_dirs', uuid, str(report_dir))
|
||||||
|
|
||||||
|
@ -135,7 +137,7 @@ class Lookyloo():
|
||||||
return self.sanejs.sha512(sha512)
|
return self.sanejs.sha512(sha512)
|
||||||
return {'response': []}
|
return {'response': []}
|
||||||
|
|
||||||
def scrape(self, url, depth: int=1, user_agent: str=None, perma_uuid: str=None):
|
def scrape(self, url, depth: int=1, listing: bool=True, user_agent: str=None, perma_uuid: str=None):
|
||||||
if not url.startswith('http'):
|
if not url.startswith('http'):
|
||||||
url = f'http://{url}'
|
url = f'http://{url}'
|
||||||
items = crawl(self.splash_url, url, depth, user_agent=user_agent, log_enabled=True, log_level='INFO')
|
items = crawl(self.splash_url, url, depth, user_agent=user_agent, log_enabled=True, log_level='INFO')
|
||||||
|
@ -147,6 +149,8 @@ class Lookyloo():
|
||||||
width = len(str(len(items)))
|
width = len(str(len(items)))
|
||||||
dirpath = self.scrape_dir / datetime.now().isoformat()
|
dirpath = self.scrape_dir / datetime.now().isoformat()
|
||||||
dirpath.mkdir()
|
dirpath.mkdir()
|
||||||
|
if not listing: # Write no_index marker
|
||||||
|
(dirpath / 'no_index').touch()
|
||||||
for i, item in enumerate(items):
|
for i, item in enumerate(items):
|
||||||
harfile = item['har']
|
harfile = item['har']
|
||||||
png = base64.b64decode(item['png'])
|
png = base64.b64decode(item['png'])
|
||||||
|
|
|
@ -52,7 +52,7 @@ def submit():
|
||||||
@app.route('/scrape', methods=['GET', 'POST'])
|
@app.route('/scrape', methods=['GET', 'POST'])
|
||||||
def scrape_web():
|
def scrape_web():
|
||||||
if request.form.get('url'):
|
if request.form.get('url'):
|
||||||
perma_uuid = lookyloo.scrape(request.form.get('url'), request.form.get('depth'))
|
perma_uuid = lookyloo.scrape(request.form.get('url'), request.form.get('depth'), request.form.get('listing'))
|
||||||
return redirect(url_for('tree', tree_uuid=perma_uuid))
|
return redirect(url_for('tree', tree_uuid=perma_uuid))
|
||||||
return render_template('scrape.html')
|
return render_template('scrape.html')
|
||||||
|
|
||||||
|
@ -146,8 +146,7 @@ def index():
|
||||||
titles = []
|
titles = []
|
||||||
for report_dir in lookyloo.report_dirs:
|
for report_dir in lookyloo.report_dirs:
|
||||||
cached = lookyloo.report_cache(report_dir)
|
cached = lookyloo.report_cache(report_dir)
|
||||||
if not cached:
|
if not cached or 'no_index' in cached:
|
||||||
continue
|
continue
|
||||||
titles.append((cached['uuid'], cached['title']))
|
titles.append((cached['uuid'], cached['title']))
|
||||||
|
|
||||||
return render_template('index.html', titles=titles)
|
return render_template('index.html', titles=titles)
|
||||||
|
|
|
@ -21,6 +21,8 @@
|
||||||
<option value="8">8</option>
|
<option value="8">8</option>
|
||||||
<option value="9">9</option>
|
<option value="9">9</option>
|
||||||
</select>
|
</select>
|
||||||
|
<label for="listing">Public</label>
|
||||||
|
<input type="checkbox" name="listing" checked="true"></input>
|
||||||
</div>
|
</div>
|
||||||
<button type="submit" class="btn btn-default">Scrape</button>
|
<button type="submit" class="btn btn-default">Scrape</button>
|
||||||
</form>
|
</form>
|
||||||
|
|
Loading…
Reference in New Issue