2019-01-30 14:30:01 +01:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
import json
|
|
|
|
import pickle
|
|
|
|
from zipfile import ZipFile, ZIP_DEFLATED
|
|
|
|
from io import BytesIO
|
|
|
|
import os
|
2019-07-05 16:27:23 +02:00
|
|
|
import logging
|
2020-01-06 15:32:38 +01:00
|
|
|
from pathlib import Path
|
2019-01-30 14:30:01 +01:00
|
|
|
|
|
|
|
from flask import Flask, render_template, request, session, send_file, redirect, url_for, Response
|
2020-01-06 15:32:38 +01:00
|
|
|
from flask_bootstrap import Bootstrap # type: ignore
|
2019-01-30 14:30:01 +01:00
|
|
|
|
2019-03-29 20:11:44 +01:00
|
|
|
from lookyloo.helpers import get_homedir, update_user_agents, get_user_agents
|
2019-01-30 14:30:01 +01:00
|
|
|
from lookyloo.lookyloo import Lookyloo
|
2019-02-18 13:52:48 +01:00
|
|
|
from lookyloo.exceptions import NoValidHarFile
|
2019-01-30 14:30:01 +01:00
|
|
|
|
2020-01-06 15:32:38 +01:00
|
|
|
from typing import Tuple
|
2019-01-30 14:30:01 +01:00
|
|
|
|
2020-01-06 15:32:38 +01:00
|
|
|
app: Flask = Flask(__name__)
|
2019-01-30 14:30:01 +01:00
|
|
|
|
2020-01-06 15:32:38 +01:00
|
|
|
secret_file_path: Path = get_homedir() / 'secret_key'
|
2019-01-30 14:30:01 +01:00
|
|
|
|
|
|
|
if not secret_file_path.exists() or secret_file_path.stat().st_size < 64:
|
|
|
|
with secret_file_path.open('wb') as f:
|
|
|
|
f.write(os.urandom(64))
|
|
|
|
|
|
|
|
with secret_file_path.open('rb') as f:
|
|
|
|
app.config['SECRET_KEY'] = f.read()
|
|
|
|
|
|
|
|
Bootstrap(app)
|
|
|
|
app.config['BOOTSTRAP_SERVE_LOCAL'] = True
|
|
|
|
app.config['SESSION_COOKIE_NAME'] = 'lookyloo'
|
|
|
|
app.debug = False
|
|
|
|
|
2020-01-06 15:32:38 +01:00
|
|
|
splash_url: str = 'http://127.0.0.1:8050'
|
2019-07-05 16:27:23 +02:00
|
|
|
# API entry point for splash
|
2019-11-02 05:05:08 +01:00
|
|
|
if os.environ.get('SPLASH_URL'):
|
2020-01-06 15:32:38 +01:00
|
|
|
splash_url = os.environ['SPLASH_URL']
|
2019-07-05 16:27:23 +02:00
|
|
|
# Splash log level
|
|
|
|
loglevel = logging.DEBUG
|
|
|
|
# Set it to True if your instance is publicly available so users aren't able to scan your internal network
|
2020-01-06 15:32:38 +01:00
|
|
|
only_global_lookups: bool = False
|
2019-07-05 16:27:23 +02:00
|
|
|
|
2020-01-06 15:32:38 +01:00
|
|
|
lookyloo: Lookyloo = Lookyloo(splash_url=splash_url, loglevel=loglevel, only_global_lookups=only_global_lookups)
|
2019-01-30 14:30:01 +01:00
|
|
|
|
|
|
|
|
|
|
|
# keep
|
2020-01-06 15:32:38 +01:00
|
|
|
def load_tree(report_dir: Path) -> Tuple[dict, str, str, str, dict]:
|
2019-01-30 14:30:01 +01:00
|
|
|
session.clear()
|
2019-04-07 23:54:16 +02:00
|
|
|
temp_file_name, tree_json, tree_time, tree_ua, tree_root_url, meta = lookyloo.load_tree(report_dir)
|
2019-01-30 14:30:01 +01:00
|
|
|
session["tree"] = temp_file_name
|
2019-04-07 23:54:16 +02:00
|
|
|
return tree_json, tree_time, tree_ua, tree_root_url, meta
|
2019-01-30 14:30:01 +01:00
|
|
|
|
|
|
|
|
|
|
|
@app.route('/submit', methods=['POST', 'GET'])
|
|
|
|
def submit():
|
|
|
|
to_query = request.get_json(force=True)
|
|
|
|
perma_uuid = lookyloo.enqueue_scrape(to_query)
|
|
|
|
return Response(perma_uuid, mimetype='text/text')
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/scrape', methods=['GET', 'POST'])
|
|
|
|
def scrape_web():
|
|
|
|
if request.form.get('url'):
|
2020-01-24 10:17:41 +01:00
|
|
|
# check if the post request has the file part
|
2020-01-24 11:25:53 +01:00
|
|
|
if 'cookies' in request.files and request.files['cookies'].filename:
|
|
|
|
cookie_file = request.files['cookies'].stream
|
2020-01-24 10:17:41 +01:00
|
|
|
else:
|
|
|
|
cookie_file = None
|
|
|
|
perma_uuid = lookyloo.scrape(url=request.form.get('url'),
|
|
|
|
cookies_pseudofile=cookie_file,
|
|
|
|
depth=request.form.get('depth'),
|
2019-04-07 23:54:16 +02:00
|
|
|
listing=request.form.get('listing'), user_agent=request.form.get('user_agent'),
|
|
|
|
os=request.form.get('os'), browser=request.form.get('browser'))
|
2019-01-30 14:30:01 +01:00
|
|
|
return redirect(url_for('tree', tree_uuid=perma_uuid))
|
2019-03-29 20:11:44 +01:00
|
|
|
user_agents = get_user_agents()
|
|
|
|
user_agents.pop('by_frequency')
|
|
|
|
return render_template('scrape.html', user_agents=user_agents)
|
2019-01-30 14:30:01 +01:00
|
|
|
|
|
|
|
|
|
|
|
@app.route('/tree/hostname/<node_uuid>/text', methods=['GET'])
|
|
|
|
def hostnode_details_text(node_uuid):
|
|
|
|
with open(session["tree"], 'rb') as f:
|
|
|
|
ct = pickle.load(f)
|
|
|
|
hostnode = ct.root_hartree.get_host_node_by_uuid(node_uuid)
|
|
|
|
urls = []
|
|
|
|
for url in hostnode.urls:
|
|
|
|
urls.append(url.name)
|
|
|
|
content = '''# URLs
|
|
|
|
|
|
|
|
{}
|
|
|
|
'''.format('\n'.join(urls))
|
|
|
|
to_return = BytesIO(content.encode())
|
|
|
|
to_return.seek(0)
|
|
|
|
return send_file(to_return, mimetype='text/markdown',
|
|
|
|
as_attachment=True, attachment_filename='file.md')
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/tree/hostname/<node_uuid>', methods=['GET'])
|
|
|
|
def hostnode_details(node_uuid):
|
|
|
|
with open(session["tree"], 'rb') as f:
|
|
|
|
ct = pickle.load(f)
|
|
|
|
hostnode = ct.root_hartree.get_host_node_by_uuid(node_uuid)
|
|
|
|
urls = []
|
|
|
|
for url in hostnode.urls:
|
|
|
|
if hasattr(url, 'body_hash'):
|
|
|
|
sane_js_r = lookyloo.sane_js_query(url.body_hash)
|
|
|
|
if sane_js_r.get('response'):
|
|
|
|
url.add_feature('sane_js_details', sane_js_r['response'])
|
|
|
|
print('######## SANEJS ##### ', url.sane_js_details)
|
|
|
|
urls.append(url.to_json())
|
|
|
|
return json.dumps(urls)
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/tree/url/<node_uuid>', methods=['GET'])
|
|
|
|
def urlnode_details(node_uuid):
|
|
|
|
with open(session["tree"], 'rb') as f:
|
|
|
|
ct = pickle.load(f)
|
|
|
|
urlnode = ct.root_hartree.get_url_node_by_uuid(node_uuid)
|
|
|
|
to_return = BytesIO()
|
|
|
|
got_content = False
|
|
|
|
if hasattr(urlnode, 'body'):
|
|
|
|
body_content = urlnode.body.getvalue()
|
|
|
|
if body_content:
|
|
|
|
got_content = True
|
|
|
|
with ZipFile(to_return, 'w', ZIP_DEFLATED) as zfile:
|
|
|
|
zfile.writestr(urlnode.filename, urlnode.body.getvalue())
|
|
|
|
if not got_content:
|
|
|
|
with ZipFile(to_return, 'w', ZIP_DEFLATED) as zfile:
|
|
|
|
zfile.writestr('file.txt', b'Response body empty')
|
|
|
|
to_return.seek(0)
|
|
|
|
return send_file(to_return, mimetype='application/zip',
|
|
|
|
as_attachment=True, attachment_filename='file.zip')
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/tree/<string:tree_uuid>/image', methods=['GET'])
|
|
|
|
def image(tree_uuid):
|
2019-02-01 16:11:16 +01:00
|
|
|
report_dir = lookyloo.lookup_report_dir(tree_uuid)
|
2019-01-30 16:01:55 +01:00
|
|
|
if not report_dir:
|
|
|
|
return Response('Not available.', mimetype='text/text')
|
2019-01-30 14:30:01 +01:00
|
|
|
to_return = lookyloo.load_image(report_dir)
|
|
|
|
return send_file(to_return, mimetype='image/png',
|
|
|
|
as_attachment=True, attachment_filename='image.png')
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/tree/<string:tree_uuid>', methods=['GET'])
|
|
|
|
def tree(tree_uuid):
|
2019-02-01 16:11:16 +01:00
|
|
|
report_dir = lookyloo.lookup_report_dir(tree_uuid)
|
2019-01-30 16:01:55 +01:00
|
|
|
if not report_dir:
|
|
|
|
return redirect(url_for('index'))
|
|
|
|
|
2019-02-18 13:52:48 +01:00
|
|
|
try:
|
2019-04-07 23:54:16 +02:00
|
|
|
tree_json, start_time, user_agent, root_url, meta = load_tree(report_dir)
|
2019-02-18 13:52:48 +01:00
|
|
|
return render_template('tree.html', tree_json=tree_json, start_time=start_time,
|
2019-04-07 23:54:16 +02:00
|
|
|
user_agent=user_agent, root_url=root_url, tree_uuid=tree_uuid,
|
|
|
|
meta=meta)
|
2019-02-18 13:52:48 +01:00
|
|
|
except NoValidHarFile as e:
|
2019-04-05 14:05:54 +02:00
|
|
|
return render_template('error.html', error_message=e)
|
2019-01-30 14:30:01 +01:00
|
|
|
|
|
|
|
|
|
|
|
@app.route('/', methods=['GET'])
|
|
|
|
def index():
|
|
|
|
if request.method == 'HEAD':
|
|
|
|
# Just returns ack if the webserver is running
|
|
|
|
return 'Ack'
|
|
|
|
lookyloo.cleanup_old_tmpfiles()
|
2019-03-29 20:11:44 +01:00
|
|
|
update_user_agents()
|
2019-01-30 14:30:01 +01:00
|
|
|
session.clear()
|
|
|
|
titles = []
|
|
|
|
for report_dir in lookyloo.report_dirs:
|
2019-02-01 16:11:16 +01:00
|
|
|
cached = lookyloo.report_cache(report_dir)
|
2019-02-18 14:29:15 +01:00
|
|
|
if not cached or 'no_index' in cached:
|
2019-01-30 14:30:01 +01:00
|
|
|
continue
|
2020-02-03 18:30:41 +01:00
|
|
|
date, time = cached['timestamp'].split('T')
|
|
|
|
time, _ = time.split('.', 1)
|
2020-02-03 22:25:48 +01:00
|
|
|
titles.append((cached['uuid'], cached['title'], date, time, cached['url'], cached['redirects']))
|
2020-02-03 18:30:41 +01:00
|
|
|
titles = sorted(titles, key=lambda x: (x[2], x[3]), reverse=True)
|
2019-01-30 14:30:01 +01:00
|
|
|
return render_template('index.html', titles=titles)
|