chg: Refactor code organisation
|
@ -5,9 +5,8 @@ from pathlib import Path
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from lookyloo.abstractmanager import AbstractManager
|
from lookyloo.abstractmanager import AbstractManager
|
||||||
from lookyloo.helpers import get_homedir, get_socket_path
|
from lookyloo.helpers import get_homedir
|
||||||
from lookyloo import scrape
|
from lookyloo.lookyloo import Lookyloo
|
||||||
from redis import Redis
|
|
||||||
|
|
||||||
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
||||||
level=logging.INFO, datefmt='%I:%M:%S')
|
level=logging.INFO, datefmt='%I:%M:%S')
|
||||||
|
@ -19,15 +18,10 @@ class AsyncScraper(AbstractManager):
|
||||||
super().__init__(loglevel)
|
super().__init__(loglevel)
|
||||||
if not storage_directory:
|
if not storage_directory:
|
||||||
self.storage_directory = get_homedir() / 'scraped'
|
self.storage_directory = get_homedir() / 'scraped'
|
||||||
self.redis = Redis(unix_socket_path=get_socket_path('cache'), decode_responses=True)
|
self.lookyloo = Lookyloo(loglevel=loglevel)
|
||||||
|
|
||||||
def _to_run_forever(self):
|
def _to_run_forever(self):
|
||||||
uuid = self.redis.spop('to_scrape')
|
self.lookyloo.process_scrape_queue()
|
||||||
if not uuid:
|
|
||||||
return
|
|
||||||
to_scrape = self.redis.hgetall(uuid)
|
|
||||||
to_scrape['perma_uuid'] = uuid
|
|
||||||
scrape(**to_scrape)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -10,4 +10,5 @@ if __name__ == '__main__':
|
||||||
get_homedir()
|
get_homedir()
|
||||||
p = Popen(['run_backend.py', '--start'])
|
p = Popen(['run_backend.py', '--start'])
|
||||||
p.wait()
|
p.wait()
|
||||||
|
Popen(['async_scrape.py'])
|
||||||
Popen(['start_website.py'])
|
Popen(['start_website.py'])
|
||||||
|
|
|
@ -5,10 +5,10 @@ from subprocess import Popen
|
||||||
from lookyloo.helpers import get_homedir
|
from lookyloo.helpers import get_homedir
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
website_dir = get_homedir()
|
website_dir = get_homedir() / 'website'
|
||||||
Popen([str(website_dir / '3rdparty.sh')], cwd=website_dir)
|
Popen([str(website_dir / '3rdparty.sh')], cwd=website_dir)
|
||||||
try:
|
try:
|
||||||
Popen(['gunicorn', '--worker-class', 'eventlet', '-w', '10', '-b', '0.0.0.0:5100', 'lookyloo:app'],
|
Popen(['gunicorn', '--worker-class', 'eventlet', '-w', '10', '-b', '0.0.0.0:5100', 'web:app'],
|
||||||
cwd=website_dir).communicate()
|
cwd=website_dir).communicate()
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
print('Stopping gunicorn.')
|
print('Stopping gunicorn.')
|
||||||
|
|
13
lookyloo.ini
|
@ -1,13 +0,0 @@
|
||||||
[uwsgi]
|
|
||||||
module = wsgi:app
|
|
||||||
|
|
||||||
master = true
|
|
||||||
processes = 5
|
|
||||||
|
|
||||||
socket = lookyloo.sock
|
|
||||||
chmod-socket = 660
|
|
||||||
vacuum = true
|
|
||||||
|
|
||||||
die-on-term = true
|
|
||||||
|
|
||||||
wsgi-disable-file-wrapper = true
|
|
|
@ -1,264 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
import json
|
|
||||||
|
|
||||||
from har2tree import CrawledTree
|
|
||||||
from scrapysplashwrapper import crawl
|
|
||||||
|
|
||||||
from flask import Flask, render_template, request, session, send_file, redirect, url_for, Response
|
|
||||||
from flask_bootstrap import Bootstrap
|
|
||||||
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
import pickle
|
|
||||||
import tempfile
|
|
||||||
import pathlib
|
|
||||||
import time
|
|
||||||
|
|
||||||
from zipfile import ZipFile, ZIP_DEFLATED
|
|
||||||
from io import BytesIO
|
|
||||||
import base64
|
|
||||||
import os
|
|
||||||
from uuid import uuid4
|
|
||||||
|
|
||||||
from pysanejs import SaneJS
|
|
||||||
|
|
||||||
from .helpers import get_homedir, get_socket_path
|
|
||||||
from redis import Redis
|
|
||||||
|
|
||||||
app = Flask(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
secret_file_path = get_homedir() / 'secret_key'
|
|
||||||
|
|
||||||
if not secret_file_path.exists() or secret_file_path.stat().st_size < 64:
|
|
||||||
with secret_file_path.open('wb') as f:
|
|
||||||
f.write(os.urandom(64))
|
|
||||||
|
|
||||||
with secret_file_path.open('rb') as f:
|
|
||||||
app.config['SECRET_KEY'] = f.read()
|
|
||||||
|
|
||||||
Bootstrap(app)
|
|
||||||
app.config['BOOTSTRAP_SERVE_LOCAL'] = True
|
|
||||||
app.config['SESSION_COOKIE_NAME'] = 'lookyloo'
|
|
||||||
app.debug = False
|
|
||||||
|
|
||||||
HAR_DIR = get_homedir() / 'scraped'
|
|
||||||
HAR_DIR.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
SPLASH = 'http://127.0.0.1:8050'
|
|
||||||
SANE_JS = 'http://127.0.0.1:5007'
|
|
||||||
|
|
||||||
if SANE_JS:
|
|
||||||
try:
|
|
||||||
sanejs = SaneJS(SANE_JS)
|
|
||||||
if sanejs.is_up:
|
|
||||||
has_sane_js = True
|
|
||||||
else:
|
|
||||||
has_sane_js = False
|
|
||||||
except Exception:
|
|
||||||
has_sane_js = False
|
|
||||||
|
|
||||||
r = Redis(unix_socket_path=get_socket_path('cache'), decode_responses=True)
|
|
||||||
|
|
||||||
|
|
||||||
def get_report_dirs():
|
|
||||||
# Cleanup HAR_DIR of failed runs.
|
|
||||||
for report_dir in HAR_DIR.iterdir():
|
|
||||||
if report_dir.is_dir() and not report_dir.iterdir():
|
|
||||||
report_dir.rmdir()
|
|
||||||
if not (report_dir / 'uuid').exists():
|
|
||||||
# Create uuid if missing
|
|
||||||
with (report_dir / 'uuid').open('w') as f:
|
|
||||||
f.write(str(uuid4()))
|
|
||||||
return sorted(HAR_DIR.iterdir(), reverse=True)
|
|
||||||
|
|
||||||
|
|
||||||
def get_lookup_dirs():
|
|
||||||
# Build lookup table trees
|
|
||||||
lookup_dirs = {}
|
|
||||||
for report_dir in get_report_dirs():
|
|
||||||
with (report_dir / 'uuid').open() as f:
|
|
||||||
lookup_dirs[f.read().strip()] = report_dir
|
|
||||||
return lookup_dirs
|
|
||||||
|
|
||||||
|
|
||||||
def cleanup_old_tmpfiles():
|
|
||||||
for tmpfile in pathlib.Path(tempfile.gettempdir()).glob('lookyloo*'):
|
|
||||||
if time.time() - tmpfile.stat().st_atime > 36000:
|
|
||||||
tmpfile.unlink()
|
|
||||||
|
|
||||||
|
|
||||||
def load_image(report_dir):
|
|
||||||
with open(list(report_dir.glob('*.png'))[0], 'rb') as f:
|
|
||||||
return BytesIO(f.read())
|
|
||||||
|
|
||||||
|
|
||||||
def load_tree(report_dir):
|
|
||||||
session.clear()
|
|
||||||
har_files = sorted(report_dir.glob('*.har'))
|
|
||||||
ct = CrawledTree(har_files)
|
|
||||||
ct.find_parents()
|
|
||||||
ct.join_trees()
|
|
||||||
temp = tempfile.NamedTemporaryFile(prefix='lookyloo', delete=False)
|
|
||||||
pickle.dump(ct, temp)
|
|
||||||
temp.close()
|
|
||||||
session["tree"] = temp.name
|
|
||||||
return ct.to_json(), ct.start_time.isoformat(), ct.user_agent, ct.root_url
|
|
||||||
|
|
||||||
|
|
||||||
def sane_js_query(sha512: str):
|
|
||||||
if has_sane_js:
|
|
||||||
return sanejs.sha512(sha512)
|
|
||||||
return {'response': []}
|
|
||||||
|
|
||||||
|
|
||||||
def scrape(url, depth: int=1, user_agent: str=None, perma_uuid: str=None):
|
|
||||||
if not url.startswith('http'):
|
|
||||||
url = f'http://{url}'
|
|
||||||
items = crawl(SPLASH, url, depth, user_agent=user_agent, log_enabled=True, log_level='INFO')
|
|
||||||
if not items:
|
|
||||||
# broken
|
|
||||||
pass
|
|
||||||
if not perma_uuid:
|
|
||||||
perma_uuid = str(uuid4())
|
|
||||||
width = len(str(len(items)))
|
|
||||||
dirpath = HAR_DIR / datetime.now().isoformat()
|
|
||||||
dirpath.mkdir()
|
|
||||||
for i, item in enumerate(items):
|
|
||||||
harfile = item['har']
|
|
||||||
png = base64.b64decode(item['png'])
|
|
||||||
child_frames = item['childFrames']
|
|
||||||
html = item['html']
|
|
||||||
with (dirpath / '{0:0{width}}.har'.format(i, width=width)).open('w') as f:
|
|
||||||
json.dump(harfile, f)
|
|
||||||
with (dirpath / '{0:0{width}}.png'.format(i, width=width)).open('wb') as f:
|
|
||||||
f.write(png)
|
|
||||||
with (dirpath / '{0:0{width}}.html'.format(i, width=width)).open('w') as f:
|
|
||||||
f.write(html)
|
|
||||||
with (dirpath / '{0:0{width}}.frames.json'.format(i, width=width)).open('w') as f:
|
|
||||||
json.dump(child_frames, f)
|
|
||||||
with (dirpath / 'uuid').open('w') as f:
|
|
||||||
f.write(perma_uuid)
|
|
||||||
return perma_uuid
|
|
||||||
|
|
||||||
|
|
||||||
@app.route('/submit', methods=['POST', 'GET'])
|
|
||||||
def submit():
|
|
||||||
to_query = request.get_json(force=True)
|
|
||||||
perma_uuid = str(uuid4())
|
|
||||||
p = r.pipeline()
|
|
||||||
p.hmset(perma_uuid, to_query)
|
|
||||||
p.sadd('to_scrape', perma_uuid)
|
|
||||||
p.execute()
|
|
||||||
return Response(perma_uuid, mimetype='text/text')
|
|
||||||
|
|
||||||
|
|
||||||
@app.route('/scrape', methods=['GET', 'POST'])
|
|
||||||
def scrape_web():
|
|
||||||
if request.form.get('url'):
|
|
||||||
perma_uuid = scrape(request.form.get('url'), request.form.get('depth'))
|
|
||||||
return redirect(url_for('tree', tree_uuid=perma_uuid))
|
|
||||||
return render_template('scrape.html')
|
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/hostname/<node_uuid>/text', methods=['GET'])
|
|
||||||
def hostnode_details_text(node_uuid):
|
|
||||||
with open(session["tree"], 'rb') as f:
|
|
||||||
ct = pickle.load(f)
|
|
||||||
hostnode = ct.root_hartree.get_host_node_by_uuid(node_uuid)
|
|
||||||
urls = []
|
|
||||||
for url in hostnode.urls:
|
|
||||||
urls.append(url.name)
|
|
||||||
content = '''# URLs
|
|
||||||
|
|
||||||
{}
|
|
||||||
'''.format('\n'.join(urls))
|
|
||||||
to_return = BytesIO(content.encode())
|
|
||||||
to_return.seek(0)
|
|
||||||
return send_file(to_return, mimetype='text/markdown',
|
|
||||||
as_attachment=True, attachment_filename='file.md')
|
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/hostname/<node_uuid>', methods=['GET'])
|
|
||||||
def hostnode_details(node_uuid):
|
|
||||||
with open(session["tree"], 'rb') as f:
|
|
||||||
ct = pickle.load(f)
|
|
||||||
hostnode = ct.root_hartree.get_host_node_by_uuid(node_uuid)
|
|
||||||
urls = []
|
|
||||||
for url in hostnode.urls:
|
|
||||||
if hasattr(url, 'body_hash'):
|
|
||||||
sane_js_r = sane_js_query(url.body_hash)
|
|
||||||
if sane_js_r.get('response'):
|
|
||||||
url.add_feature('sane_js_details', sane_js_r['response'])
|
|
||||||
print('######## SANEJS ##### ', url.sane_js_details)
|
|
||||||
urls.append(url.to_json())
|
|
||||||
return json.dumps(urls)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/url/<node_uuid>', methods=['GET'])
|
|
||||||
def urlnode_details(node_uuid):
|
|
||||||
with open(session["tree"], 'rb') as f:
|
|
||||||
ct = pickle.load(f)
|
|
||||||
urlnode = ct.root_hartree.get_url_node_by_uuid(node_uuid)
|
|
||||||
to_return = BytesIO()
|
|
||||||
got_content = False
|
|
||||||
if hasattr(urlnode, 'body'):
|
|
||||||
body_content = urlnode.body.getvalue()
|
|
||||||
if body_content:
|
|
||||||
got_content = True
|
|
||||||
with ZipFile(to_return, 'w', ZIP_DEFLATED) as zfile:
|
|
||||||
zfile.writestr(urlnode.filename, urlnode.body.getvalue())
|
|
||||||
if not got_content:
|
|
||||||
with ZipFile(to_return, 'w', ZIP_DEFLATED) as zfile:
|
|
||||||
zfile.writestr('file.txt', b'Response body empty')
|
|
||||||
to_return.seek(0)
|
|
||||||
return send_file(to_return, mimetype='application/zip',
|
|
||||||
as_attachment=True, attachment_filename='file.zip')
|
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/image', methods=['GET'])
|
|
||||||
def image(tree_uuid):
|
|
||||||
lookup_dirs = get_lookup_dirs()
|
|
||||||
report_dir = lookup_dirs[tree_uuid]
|
|
||||||
to_return = load_image(report_dir)
|
|
||||||
return send_file(to_return, mimetype='image/png',
|
|
||||||
as_attachment=True, attachment_filename='image.png')
|
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>', methods=['GET'])
|
|
||||||
def tree(tree_uuid):
|
|
||||||
lookup_dirs = get_lookup_dirs()
|
|
||||||
report_dir = lookup_dirs[tree_uuid]
|
|
||||||
tree_json, start_time, user_agent, root_url = load_tree(report_dir)
|
|
||||||
return render_template('tree.html', tree_json=tree_json, start_time=start_time,
|
|
||||||
user_agent=user_agent, root_url=root_url, tree_uuid=tree_uuid)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route('/', methods=['GET'])
|
|
||||||
def index():
|
|
||||||
if request.method == 'HEAD':
|
|
||||||
# Just returns ack if the webserver is running
|
|
||||||
return 'Ack'
|
|
||||||
cleanup_old_tmpfiles()
|
|
||||||
session.clear()
|
|
||||||
titles = []
|
|
||||||
if not HAR_DIR.exists():
|
|
||||||
HAR_DIR.mkdir(parents=True)
|
|
||||||
for report_dir in get_report_dirs():
|
|
||||||
har_files = sorted(report_dir.glob('*.har'))
|
|
||||||
if not har_files:
|
|
||||||
continue
|
|
||||||
with har_files[0].open() as f:
|
|
||||||
j = json.load(f)
|
|
||||||
title = j['log']['pages'][0]['title']
|
|
||||||
with (report_dir / 'uuid').open() as f:
|
|
||||||
uuid = f.read().strip()
|
|
||||||
titles.append((uuid, title))
|
|
||||||
|
|
||||||
return render_template('index.html', titles=titles)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
app.run(port=5001, threaded=True)
|
|
|
@ -0,0 +1,137 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
from scrapysplashwrapper import crawl
|
||||||
|
from har2tree import CrawledTree
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
import tempfile
|
||||||
|
import pathlib
|
||||||
|
import time
|
||||||
|
|
||||||
|
from io import BytesIO
|
||||||
|
import base64
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
from pysanejs import SaneJS
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from .helpers import get_homedir, get_socket_path
|
||||||
|
from redis import Redis
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
|
||||||
|
class Lookyloo():
|
||||||
|
|
||||||
|
def __init__(self, splash_url: str='http://127.0.0.1:8050', loglevel: int=logging.DEBUG):
|
||||||
|
self.__init_logger(loglevel)
|
||||||
|
self.redis = Redis(unix_socket_path=get_socket_path('cache'), decode_responses=True)
|
||||||
|
self.scrape_dir = get_homedir() / 'scraped'
|
||||||
|
self.splash_url = splash_url
|
||||||
|
if not self.scrape_dir.exists():
|
||||||
|
self.scrape_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Try to reach sanejs
|
||||||
|
self.sanejs = SaneJS()
|
||||||
|
if not self.sanejs.is_up:
|
||||||
|
self.sanejs = None
|
||||||
|
|
||||||
|
def __init_logger(self, loglevel) -> None:
|
||||||
|
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
||||||
|
self.logger.setLevel(loglevel)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def report_dirs(self):
|
||||||
|
for report_dir in self.scrape_dir.iterdir():
|
||||||
|
if report_dir.is_dir() and not report_dir.iterdir():
|
||||||
|
# Cleanup self.scrape_dir of failed runs.
|
||||||
|
report_dir.rmdir()
|
||||||
|
if not (report_dir / 'uuid').exists():
|
||||||
|
# Create uuid if missing
|
||||||
|
with (report_dir / 'uuid').open('w') as f:
|
||||||
|
f.write(str(uuid4()))
|
||||||
|
return sorted(self.scrape_dir.iterdir(), reverse=True)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def lookup_dirs(self):
|
||||||
|
# Build lookup table trees
|
||||||
|
lookup_dirs = {}
|
||||||
|
for report_dir in self.report_dirs:
|
||||||
|
with (report_dir / 'uuid').open() as f:
|
||||||
|
lookup_dirs[f.read().strip()] = report_dir
|
||||||
|
return lookup_dirs
|
||||||
|
|
||||||
|
def enqueue_scrape(self, query: dict):
|
||||||
|
perma_uuid = str(uuid4())
|
||||||
|
p = self.redis.pipeline()
|
||||||
|
p.hmset(perma_uuid, query)
|
||||||
|
p.sadd('to_scrape', perma_uuid)
|
||||||
|
p.execute()
|
||||||
|
return perma_uuid
|
||||||
|
|
||||||
|
def process_scrape_queue(self):
|
||||||
|
uuid = self.redis.spop('to_scrape')
|
||||||
|
if not uuid:
|
||||||
|
return
|
||||||
|
to_scrape = self.redis.hgetall(uuid)
|
||||||
|
self.redis.delete(uuid)
|
||||||
|
to_scrape['perma_uuid'] = uuid
|
||||||
|
self.scrape(**to_scrape)
|
||||||
|
|
||||||
|
def load_tree(self, report_dir: Path):
|
||||||
|
har_files = sorted(report_dir.glob('*.har'))
|
||||||
|
ct = CrawledTree(har_files)
|
||||||
|
ct.find_parents()
|
||||||
|
ct.join_trees()
|
||||||
|
temp = tempfile.NamedTemporaryFile(prefix='lookyloo', delete=False)
|
||||||
|
pickle.dump(ct, temp)
|
||||||
|
temp.close()
|
||||||
|
return temp.name, ct.to_json(), ct.start_time.isoformat(), ct.user_agent, ct.root_url
|
||||||
|
|
||||||
|
def cleanup_old_tmpfiles(self):
|
||||||
|
for tmpfile in pathlib.Path(tempfile.gettempdir()).glob('lookyloo*'):
|
||||||
|
if time.time() - tmpfile.stat().st_atime > 36000:
|
||||||
|
tmpfile.unlink()
|
||||||
|
|
||||||
|
def load_image(self, report_dir):
|
||||||
|
with open(list(report_dir.glob('*.png'))[0], 'rb') as f:
|
||||||
|
return BytesIO(f.read())
|
||||||
|
|
||||||
|
def sane_js_query(self, sha512: str):
|
||||||
|
if self.sanejs:
|
||||||
|
return self.sanejs.sha512(sha512)
|
||||||
|
return {'response': []}
|
||||||
|
|
||||||
|
def scrape(self, url, depth: int=1, user_agent: str=None, perma_uuid: str=None):
|
||||||
|
if not url.startswith('http'):
|
||||||
|
url = f'http://{url}'
|
||||||
|
items = crawl(self.splash_url, url, depth, user_agent=user_agent, log_enabled=True, log_level='INFO')
|
||||||
|
if not items:
|
||||||
|
# broken
|
||||||
|
pass
|
||||||
|
if not perma_uuid:
|
||||||
|
perma_uuid = str(uuid4())
|
||||||
|
width = len(str(len(items)))
|
||||||
|
dirpath = self.scrape_dir / datetime.now().isoformat()
|
||||||
|
dirpath.mkdir()
|
||||||
|
for i, item in enumerate(items):
|
||||||
|
harfile = item['har']
|
||||||
|
png = base64.b64decode(item['png'])
|
||||||
|
child_frames = item['childFrames']
|
||||||
|
html = item['html']
|
||||||
|
with (dirpath / '{0:0{width}}.har'.format(i, width=width)).open('w') as f:
|
||||||
|
json.dump(harfile, f)
|
||||||
|
with (dirpath / '{0:0{width}}.png'.format(i, width=width)).open('wb') as f:
|
||||||
|
f.write(png)
|
||||||
|
with (dirpath / '{0:0{width}}.html'.format(i, width=width)).open('w') as f:
|
||||||
|
f.write(html)
|
||||||
|
with (dirpath / '{0:0{width}}.frames.json'.format(i, width=width)).open('w') as f:
|
||||||
|
json.dump(child_frames, f)
|
||||||
|
with (dirpath / 'uuid').open('w') as f:
|
||||||
|
f.write(perma_uuid)
|
||||||
|
return perma_uuid
|
|
@ -5,8 +5,8 @@ set -x
|
||||||
|
|
||||||
mkdir -p web/static/
|
mkdir -p web/static/
|
||||||
|
|
||||||
wget https://d3js.org/d3.v5.js -O lookyloo/static/d3.v5.js
|
wget https://d3js.org/d3.v5.js -O web/static/d3.v5.js
|
||||||
|
|
||||||
FileSaver="5733e40e5af936eb3f48554cf6a8a7075d71d18a"
|
FileSaver="5733e40e5af936eb3f48554cf6a8a7075d71d18a"
|
||||||
|
|
||||||
wget https://cdn.rawgit.com/eligrey/FileSaver.js/${FileSaver}/FileSaver.js -O lookyloo/static/FileSaver.js
|
wget https://cdn.rawgit.com/eligrey/FileSaver.js/${FileSaver}/FileSaver.js -O web/static/FileSaver.js
|
|
@ -0,0 +1,151 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import json
|
||||||
|
import pickle
|
||||||
|
from zipfile import ZipFile, ZIP_DEFLATED
|
||||||
|
from io import BytesIO
|
||||||
|
import os
|
||||||
|
|
||||||
|
from flask import Flask, render_template, request, session, send_file, redirect, url_for, Response
|
||||||
|
from flask_bootstrap import Bootstrap
|
||||||
|
|
||||||
|
from lookyloo.helpers import get_homedir
|
||||||
|
from lookyloo.lookyloo import Lookyloo
|
||||||
|
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
secret_file_path = get_homedir() / 'secret_key'
|
||||||
|
|
||||||
|
if not secret_file_path.exists() or secret_file_path.stat().st_size < 64:
|
||||||
|
with secret_file_path.open('wb') as f:
|
||||||
|
f.write(os.urandom(64))
|
||||||
|
|
||||||
|
with secret_file_path.open('rb') as f:
|
||||||
|
app.config['SECRET_KEY'] = f.read()
|
||||||
|
|
||||||
|
Bootstrap(app)
|
||||||
|
app.config['BOOTSTRAP_SERVE_LOCAL'] = True
|
||||||
|
app.config['SESSION_COOKIE_NAME'] = 'lookyloo'
|
||||||
|
app.debug = False
|
||||||
|
|
||||||
|
lookyloo = Lookyloo()
|
||||||
|
|
||||||
|
|
||||||
|
# keep
|
||||||
|
def load_tree(report_dir):
|
||||||
|
session.clear()
|
||||||
|
temp_file_name, tree_json, tree_time, tree_ua, tree_root_url = lookyloo.load_tree(report_dir)
|
||||||
|
session["tree"] = temp_file_name
|
||||||
|
return tree_json, tree_time, tree_ua, tree_root_url
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/submit', methods=['POST', 'GET'])
|
||||||
|
def submit():
|
||||||
|
to_query = request.get_json(force=True)
|
||||||
|
perma_uuid = lookyloo.enqueue_scrape(to_query)
|
||||||
|
return Response(perma_uuid, mimetype='text/text')
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/scrape', methods=['GET', 'POST'])
|
||||||
|
def scrape_web():
|
||||||
|
if request.form.get('url'):
|
||||||
|
perma_uuid = lookyloo.scrape(request.form.get('url'), request.form.get('depth'))
|
||||||
|
return redirect(url_for('tree', tree_uuid=perma_uuid))
|
||||||
|
return render_template('scrape.html')
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/tree/hostname/<node_uuid>/text', methods=['GET'])
|
||||||
|
def hostnode_details_text(node_uuid):
|
||||||
|
with open(session["tree"], 'rb') as f:
|
||||||
|
ct = pickle.load(f)
|
||||||
|
hostnode = ct.root_hartree.get_host_node_by_uuid(node_uuid)
|
||||||
|
urls = []
|
||||||
|
for url in hostnode.urls:
|
||||||
|
urls.append(url.name)
|
||||||
|
content = '''# URLs
|
||||||
|
|
||||||
|
{}
|
||||||
|
'''.format('\n'.join(urls))
|
||||||
|
to_return = BytesIO(content.encode())
|
||||||
|
to_return.seek(0)
|
||||||
|
return send_file(to_return, mimetype='text/markdown',
|
||||||
|
as_attachment=True, attachment_filename='file.md')
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/tree/hostname/<node_uuid>', methods=['GET'])
|
||||||
|
def hostnode_details(node_uuid):
|
||||||
|
with open(session["tree"], 'rb') as f:
|
||||||
|
ct = pickle.load(f)
|
||||||
|
hostnode = ct.root_hartree.get_host_node_by_uuid(node_uuid)
|
||||||
|
urls = []
|
||||||
|
for url in hostnode.urls:
|
||||||
|
if hasattr(url, 'body_hash'):
|
||||||
|
sane_js_r = lookyloo.sane_js_query(url.body_hash)
|
||||||
|
if sane_js_r.get('response'):
|
||||||
|
url.add_feature('sane_js_details', sane_js_r['response'])
|
||||||
|
print('######## SANEJS ##### ', url.sane_js_details)
|
||||||
|
urls.append(url.to_json())
|
||||||
|
return json.dumps(urls)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/tree/url/<node_uuid>', methods=['GET'])
|
||||||
|
def urlnode_details(node_uuid):
|
||||||
|
with open(session["tree"], 'rb') as f:
|
||||||
|
ct = pickle.load(f)
|
||||||
|
urlnode = ct.root_hartree.get_url_node_by_uuid(node_uuid)
|
||||||
|
to_return = BytesIO()
|
||||||
|
got_content = False
|
||||||
|
if hasattr(urlnode, 'body'):
|
||||||
|
body_content = urlnode.body.getvalue()
|
||||||
|
if body_content:
|
||||||
|
got_content = True
|
||||||
|
with ZipFile(to_return, 'w', ZIP_DEFLATED) as zfile:
|
||||||
|
zfile.writestr(urlnode.filename, urlnode.body.getvalue())
|
||||||
|
if not got_content:
|
||||||
|
with ZipFile(to_return, 'w', ZIP_DEFLATED) as zfile:
|
||||||
|
zfile.writestr('file.txt', b'Response body empty')
|
||||||
|
to_return.seek(0)
|
||||||
|
return send_file(to_return, mimetype='application/zip',
|
||||||
|
as_attachment=True, attachment_filename='file.zip')
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/tree/<string:tree_uuid>/image', methods=['GET'])
|
||||||
|
def image(tree_uuid):
|
||||||
|
lookup_dirs = lookyloo.lookup_dirs
|
||||||
|
report_dir = lookup_dirs[tree_uuid]
|
||||||
|
to_return = lookyloo.load_image(report_dir)
|
||||||
|
return send_file(to_return, mimetype='image/png',
|
||||||
|
as_attachment=True, attachment_filename='image.png')
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/tree/<string:tree_uuid>', methods=['GET'])
|
||||||
|
def tree(tree_uuid):
|
||||||
|
lookup_dirs = lookyloo.lookup_dirs
|
||||||
|
report_dir = lookup_dirs[tree_uuid]
|
||||||
|
tree_json, start_time, user_agent, root_url = load_tree(report_dir)
|
||||||
|
return render_template('tree.html', tree_json=tree_json, start_time=start_time,
|
||||||
|
user_agent=user_agent, root_url=root_url, tree_uuid=tree_uuid)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/', methods=['GET'])
|
||||||
|
def index():
|
||||||
|
if request.method == 'HEAD':
|
||||||
|
# Just returns ack if the webserver is running
|
||||||
|
return 'Ack'
|
||||||
|
lookyloo.cleanup_old_tmpfiles()
|
||||||
|
session.clear()
|
||||||
|
titles = []
|
||||||
|
for report_dir in lookyloo.report_dirs:
|
||||||
|
har_files = sorted(report_dir.glob('*.har'))
|
||||||
|
if not har_files:
|
||||||
|
continue
|
||||||
|
with har_files[0].open() as f:
|
||||||
|
j = json.load(f)
|
||||||
|
title = j['log']['pages'][0]['title']
|
||||||
|
with (report_dir / 'uuid').open() as f:
|
||||||
|
uuid = f.read().strip()
|
||||||
|
titles.append((uuid, title))
|
||||||
|
|
||||||
|
return render_template('index.html', titles=titles)
|
Before Width: | Height: | Size: 5.1 KiB After Width: | Height: | Size: 5.1 KiB |
Before Width: | Height: | Size: 6.8 KiB After Width: | Height: | Size: 6.8 KiB |
Before Width: | Height: | Size: 6.2 KiB After Width: | Height: | Size: 6.2 KiB |
Before Width: | Height: | Size: 6.6 KiB After Width: | Height: | Size: 6.6 KiB |
Before Width: | Height: | Size: 8.7 KiB After Width: | Height: | Size: 8.7 KiB |
Before Width: | Height: | Size: 87 KiB After Width: | Height: | Size: 87 KiB |
Before Width: | Height: | Size: 5.7 KiB After Width: | Height: | Size: 5.7 KiB |
Before Width: | Height: | Size: 4.2 KiB After Width: | Height: | Size: 4.2 KiB |
Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 14 KiB |
Before Width: | Height: | Size: 25 KiB After Width: | Height: | Size: 25 KiB |
Before Width: | Height: | Size: 6.4 KiB After Width: | Height: | Size: 6.4 KiB |
Before Width: | Height: | Size: 7.5 KiB After Width: | Height: | Size: 7.5 KiB |
Before Width: | Height: | Size: 7.2 KiB After Width: | Height: | Size: 7.2 KiB |
Before Width: | Height: | Size: 23 KiB After Width: | Height: | Size: 23 KiB |
Before Width: | Height: | Size: 6.4 KiB After Width: | Height: | Size: 6.4 KiB |
Before Width: | Height: | Size: 18 KiB After Width: | Height: | Size: 18 KiB |
Before Width: | Height: | Size: 20 KiB After Width: | Height: | Size: 20 KiB |