mirror of https://github.com/CIRCL/lookyloo
Add initial web interface
parent
eb71d7a3eb
commit
f7f1abc9e4
|
@ -1,3 +1,8 @@
|
||||||
|
# Local exclude
|
||||||
|
scraped/
|
||||||
|
*.swp
|
||||||
|
lookyloo/ete3_webserver/webapi.py
|
||||||
|
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.py[cod]
|
*.py[cod]
|
||||||
|
|
46
README.md
46
README.md
|
@ -1,2 +1,44 @@
|
||||||
# lookyloo
|
Lookyloo
|
||||||
*Lookyloo* is a web interface allowing to scrape a website and then displays a tree of domains calling each other.
|
========
|
||||||
|
|
||||||
|
*Lookyloo* is a web interface allowing to scrape a website and then displays a
|
||||||
|
tree of domains calling each other.
|
||||||
|
|
||||||
|
|
||||||
|
What is that name?!
|
||||||
|
===================
|
||||||
|
|
||||||
|
|
||||||
|
```
|
||||||
|
1. People who just come to look.
|
||||||
|
2. People who go out of their way to look at people or something often causing crowds and more disruption.
|
||||||
|
3. People who enjoy staring at watching other peoples misfortune. Oftentimes car onlookers to car accidents.
|
||||||
|
Same as Looky Lou; often spelled as Looky-loo (hyphen) or lookylou
|
||||||
|
In L.A. usually the lookyloo's cause more accidents by not paying full attention to what is ahead of them.
|
||||||
|
```
|
||||||
|
|
||||||
|
Source: Urban Dictionary
|
||||||
|
|
||||||
|
|
||||||
|
Implementation details
|
||||||
|
======================
|
||||||
|
|
||||||
|
This code is very heavily inspired by https://github.com/etetoolkit/webplugin and adapted to use flask as backend.
|
||||||
|
|
||||||
|
Installation of har2tree
|
||||||
|
========================
|
||||||
|
|
||||||
|
The core dependency is ETE Toolkit, which you can install following the guide
|
||||||
|
on the official website: http://etetoolkit.org/download/
|
||||||
|
|
||||||
|
Protip
|
||||||
|
======
|
||||||
|
|
||||||
|
If you like using virtualenv and have `pew` installed you can also do it this way:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo apt-get install python-qt4
|
||||||
|
pip install -r requirements.txt
|
||||||
|
pew toggleglobalsitepackages # PyQt4 is not easily installable in a virtualenv
|
||||||
|
pip install -e .
|
||||||
|
```
|
||||||
|
|
|
@ -0,0 +1,85 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
from har2tree import CrawledTree, hostname_treestyle
|
||||||
|
from scrapysplashwrapper import crawl
|
||||||
|
from ete3_webserver import NodeActions, WebTreeHandler
|
||||||
|
|
||||||
|
from flask import Flask, render_template, request
|
||||||
|
from flask_bootstrap import Bootstrap
|
||||||
|
|
||||||
|
from glob import glob
|
||||||
|
import os
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
Bootstrap(app)
|
||||||
|
app.config['BOOTSTRAP_SERVE_LOCAL'] = True
|
||||||
|
app.debug = True
|
||||||
|
|
||||||
|
HAR_DIR = 'scraped'
|
||||||
|
SPLASH = 'http://127.0.0.1:8050'
|
||||||
|
|
||||||
|
|
||||||
|
def load_tree(report_dir):
|
||||||
|
har_files = sorted(glob(os.path.join(HAR_DIR, report_dir, '*.har')))
|
||||||
|
ct = CrawledTree(har_files)
|
||||||
|
ct.find_parents()
|
||||||
|
ct.join_trees()
|
||||||
|
ct.root_hartree.make_hostname_tree()
|
||||||
|
actions = NodeActions()
|
||||||
|
style = hostname_treestyle()
|
||||||
|
return WebTreeHandler(ct.root_hartree.hostname_tree, actions, style)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/scrap', methods=['GET', 'POST'])
|
||||||
|
def scrap():
|
||||||
|
if request.form.get('url'):
|
||||||
|
url = request.form.get('url')
|
||||||
|
depth = request.form.get('depth')
|
||||||
|
items = crawl(SPLASH, url, depth)
|
||||||
|
if not items:
|
||||||
|
# broken
|
||||||
|
pass
|
||||||
|
width = len(str(len(items)))
|
||||||
|
i = 1
|
||||||
|
dirpath = os.path.join(HAR_DIR, datetime.now().isoformat())
|
||||||
|
os.makedirs(dirpath)
|
||||||
|
for item in items:
|
||||||
|
harfile = item['har']
|
||||||
|
with open(os.path.join(dirpath, '{0:0{width}}.har'.format(i, width=width)), 'w') as f:
|
||||||
|
json.dump(harfile, f)
|
||||||
|
i += 1
|
||||||
|
return tree(-1)
|
||||||
|
return render_template('scrap.html')
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/tree/<int:tree_id>', methods=['GET'])
|
||||||
|
def tree(tree_id):
|
||||||
|
report_dir = sorted(os.listdir(HAR_DIR))[tree_id]
|
||||||
|
tree = load_tree(report_dir)
|
||||||
|
nodes, faces, base64 = tree.redraw()
|
||||||
|
return render_template('tree.html', nodes=nodes, faces=faces, base64_img=base64)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/', methods=['GET'])
|
||||||
|
def index():
|
||||||
|
i = 0
|
||||||
|
titles = []
|
||||||
|
for report_dir in sorted(os.listdir(HAR_DIR)):
|
||||||
|
har_files = sorted(glob(os.path.join(HAR_DIR, report_dir, '*.har')))
|
||||||
|
if not har_files:
|
||||||
|
continue
|
||||||
|
with open(har_files[0], 'r') as f:
|
||||||
|
j = json.load(f)
|
||||||
|
titles.append((i, j['log']['pages'][0]['title']))
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
return render_template('index.html', titles=titles)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
app.run(port=5001)
|
|
@ -0,0 +1,3 @@
|
||||||
|
from .tree_handler import WebTreeHandler, NodeActions
|
||||||
|
|
||||||
|
__all__ = ['WebTreeHandler', 'NodeActions']
|
|
@ -0,0 +1,90 @@
|
||||||
|
import time
|
||||||
|
import string
|
||||||
|
import random
|
||||||
|
# import logging as log
|
||||||
|
from ete3 import Tree # , TreeStyle
|
||||||
|
from ete3.parser.newick import NewickError
|
||||||
|
|
||||||
|
|
||||||
|
def timeit(f):
|
||||||
|
def a_wrapper_accepting_arguments(*args, **kargs):
|
||||||
|
t1 = time.time()
|
||||||
|
r = f(*args, **kargs)
|
||||||
|
print(" %0.3f secs: %s" % (time.time() - t1, f.__name__))
|
||||||
|
return r
|
||||||
|
return a_wrapper_accepting_arguments
|
||||||
|
|
||||||
|
|
||||||
|
def id_generator(size=6, chars=string.ascii_uppercase + string.digits):
|
||||||
|
return ''.join(random.choice(chars) for _ in range(size))
|
||||||
|
|
||||||
|
|
||||||
|
class WebTreeHandler(object):
|
||||||
|
def __init__(self, newick, actions, style):
|
||||||
|
if isinstance(newick, Tree):
|
||||||
|
self.tree = newick
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
self.tree = Tree(newick)
|
||||||
|
except NewickError:
|
||||||
|
self.tree = Tree(newick, format=1)
|
||||||
|
|
||||||
|
self.tree.actions = actions
|
||||||
|
self.tree.tree_style = style
|
||||||
|
|
||||||
|
# Initialze node internal IDs
|
||||||
|
for index, n in enumerate(self.tree.traverse('preorder')):
|
||||||
|
n._nid = index
|
||||||
|
|
||||||
|
@timeit
|
||||||
|
def redraw(self):
|
||||||
|
base64_img, img_map = self.tree.render("%%return.PNG", tree_style=self.tree.tree_style)
|
||||||
|
nodes, faces = self.get_html_map(img_map)
|
||||||
|
base64 = base64_img.data().decode()
|
||||||
|
return nodes, faces, base64
|
||||||
|
|
||||||
|
def get_html_map(self, img_map):
|
||||||
|
nodes = []
|
||||||
|
if img_map.get("nodes"):
|
||||||
|
for x1, y1, x2, y2, nodeid, text in img_map["nodes"]:
|
||||||
|
nodes.append([x1, y1, x2, y2, nodeid, text, img_map["node_areas"].get(int(nodeid), [0, 0, 0, 0])])
|
||||||
|
faces = []
|
||||||
|
if img_map.get("faces"):
|
||||||
|
for x1, y1, x2, y2, nodeid, text in img_map["faces"]:
|
||||||
|
faces.append([x1, y1, x2, y2, nodeid, text, img_map["node_areas"].get(int(nodeid), [0, 0, 0, 0])])
|
||||||
|
return nodes, faces
|
||||||
|
|
||||||
|
def get_avail_actions(self, nodeid):
|
||||||
|
target = self.tree.search_nodes(_nid=int(nodeid))[0]
|
||||||
|
action_list = []
|
||||||
|
for aindex, aname, show_fn, run_fn in self.tree.actions:
|
||||||
|
if show_fn(target):
|
||||||
|
action_list.append([aindex, aname])
|
||||||
|
return action_list
|
||||||
|
|
||||||
|
def run_action(self, aindex, nodeid):
|
||||||
|
target = self.tree.search_nodes(_nid=int(nodeid))[0]
|
||||||
|
run_fn = self.tree.actions.actions[aindex][2]
|
||||||
|
return run_fn(self.tree, target)
|
||||||
|
|
||||||
|
|
||||||
|
class NodeActions(object):
|
||||||
|
def __str__(self):
|
||||||
|
text = []
|
||||||
|
for aindex, aname, show_fn, run_fn in self:
|
||||||
|
text.append("%s: %s, %s, %s" % (aindex, aname, show_fn, run_fn))
|
||||||
|
return '\n'.join(text)
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
for aindex, (aname, show_fn, run_fn) in self.actions.items():
|
||||||
|
yield (aindex, aname, show_fn, run_fn)
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.actions = {}
|
||||||
|
|
||||||
|
def clear_default_actions(self):
|
||||||
|
self.actions = {}
|
||||||
|
|
||||||
|
def add_action(self, action_name, show_fn, run_fn):
|
||||||
|
aindex = "act_" + id_generator()
|
||||||
|
self.actions[aindex] = [action_name, show_fn, run_fn]
|
|
@ -0,0 +1,8 @@
|
||||||
|
#highlighter {
|
||||||
|
position: absolute;
|
||||||
|
visibility: visible;
|
||||||
|
z-index:100;
|
||||||
|
top:0; left:0;
|
||||||
|
width: 70px; height: 70px;
|
||||||
|
border: 2px solid indianred;
|
||||||
|
}
|
|
@ -0,0 +1,19 @@
|
||||||
|
function highlight_node(x, y, width, height){
|
||||||
|
//console.log(treeid, nodeid, x, y, width, height);
|
||||||
|
var img = $('#img');
|
||||||
|
var offset = img.offset();
|
||||||
|
// console.log(img);
|
||||||
|
// console.log(offset);
|
||||||
|
|
||||||
|
$("#highlighter").show();
|
||||||
|
$("#highlighter").css("top", offset.top+y-1);
|
||||||
|
$("#highlighter").css("left", offset.left+x-1);
|
||||||
|
$("#highlighter").css("width", width+1);
|
||||||
|
$("#highlighter").css("height", height+1);
|
||||||
|
|
||||||
|
}
|
||||||
|
function unhighlight_node(){
|
||||||
|
// console.log("unhighlight");
|
||||||
|
$("#highlighter").hide();
|
||||||
|
}
|
||||||
|
|
Binary file not shown.
After Width: | Height: | Size: 7.2 KiB |
|
@ -0,0 +1,15 @@
|
||||||
|
{% extends "main.html" %}
|
||||||
|
|
||||||
|
{% block title %}Tree{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<center>
|
||||||
|
<h2><a href="{{ url_for('scrap') }}">Scrap a page</a></h2></br></br>
|
||||||
|
</center>
|
||||||
|
<center>
|
||||||
|
{% for id, page_title in titles %}
|
||||||
|
<a href="{{ url_for('tree', tree_id=id) }}">{{ page_title }}</a></br>
|
||||||
|
</br>
|
||||||
|
{% endfor %}
|
||||||
|
</center>
|
||||||
|
{% endblock %}
|
|
@ -0,0 +1,11 @@
|
||||||
|
{% extends "bootstrap/base.html" %}
|
||||||
|
|
||||||
|
{% block scripts %}
|
||||||
|
{{ super() }}
|
||||||
|
<script src='{{ url_for('static', filename='ete.js') }}'></script>
|
||||||
|
{% endblock %}
|
||||||
|
|
||||||
|
{% block head %}
|
||||||
|
{{ super() }}
|
||||||
|
<link rel="stylesheet" type="text/css" href="{{ url_for('static',filename='ete.css') }}">
|
||||||
|
{% endblock %}
|
|
@ -0,0 +1,16 @@
|
||||||
|
{% extends "main.html" %}
|
||||||
|
{% block title %}Scrap{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<div class="container">
|
||||||
|
<h1>Scrap a page</h1>
|
||||||
|
|
||||||
|
<form class="form-inline" role="form" action="scrap" method=post>
|
||||||
|
<div class="form-group">
|
||||||
|
<input type="text" class="form-control" name="url" id=url placeholder="URL to scrap">
|
||||||
|
<input type="text" class="form-control" name="depth" id=depth placeholder="Depth">
|
||||||
|
</div>
|
||||||
|
<button type="submit" class="btn btn-default">Scrap</button>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
{% endblock %}
|
|
@ -0,0 +1,31 @@
|
||||||
|
{% extends "main.html" %}
|
||||||
|
|
||||||
|
{% block title %}Tree{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<div id="highlighter" style="display: none;"></div>
|
||||||
|
<MAP NAME="map" class="ete_tree_img">
|
||||||
|
{% for x1, y1, x2, y2, nodeid, text, area in nodes %}
|
||||||
|
<div class="popup">
|
||||||
|
<AREA SHAPE="rect" COORDS="{{ x1 }}, {{ y1 }}, {{ x2 }}, {{ y2 }}"
|
||||||
|
onMouseOut='unhighlight_node();'
|
||||||
|
onMouseOver='highlight_node("{{ nodeid }}", "{{ text }}", {{ area[0] }}, {{ area[1] }}, {{ area[2]-area[0] }}, {{ area[3]-area[1] }});'
|
||||||
|
onclick='showPopup({{ nodeid }});'
|
||||||
|
href="javascript:void('{{ nodeid }}');">
|
||||||
|
</div>
|
||||||
|
{% endfor %}
|
||||||
|
{% for x1, y1, x2, y2, nodeid, text, area in faces %}
|
||||||
|
<div class="popup">
|
||||||
|
<AREA SHAPE="rect" COORDS="{{ x1 }}, {{ y1 }}, {{ x2 }}, {{ y2 }}"
|
||||||
|
onMouseOut='unhighlight_node();'
|
||||||
|
onMouseOver='highlight_node({{ area[0] }}, {{ area[1] }}, {{ area[2]-area[0] }}, {{ area[3]-area[1] }});'
|
||||||
|
onclick='showPopup("{{ nodeid }}");'
|
||||||
|
href="javascript:void('{{ nodeid }}');">
|
||||||
|
</div>
|
||||||
|
{% endfor %}
|
||||||
|
</MAP>
|
||||||
|
<div id="box">
|
||||||
|
<img id="img" class="ete_tree_img" USEMAP="#map" src="data:image/png;base64,{{ base64_img }}">
|
||||||
|
<div style="margin:0px;padding:0px;text-align:left;"><a href="http://etetoolkit.org" style="font-size:7pt;" target="_blank" >Powered by etetoolkit</a></div>
|
||||||
|
</div>
|
||||||
|
{% endblock %}
|
|
@ -0,0 +1,10 @@
|
||||||
|
# Web thing
|
||||||
|
flask
|
||||||
|
flask-bootstrap
|
||||||
|
|
||||||
|
# Backend libs
|
||||||
|
git+https://github.com/viper-framework/har2tree.git
|
||||||
|
git+https://github.com/viper-framework/ScrapySplashWrapper.git
|
||||||
|
|
||||||
|
# Required for the drawing (latest version)
|
||||||
|
git+https://github.com/etetoolkit/ete.git
|
|
@ -0,0 +1,26 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from setuptools import setup
|
||||||
|
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name='lookyloo',
|
||||||
|
version='0.1',
|
||||||
|
author='Raphaël Vinot',
|
||||||
|
author_email='raphael.vinot@circl.lu',
|
||||||
|
maintainer='Raphaël Vinot',
|
||||||
|
url='https://github.com/CIRCL/lookyloo',
|
||||||
|
description='Web interface to track the trackers.',
|
||||||
|
packages=['lookyloo'],
|
||||||
|
include_package_data=True,
|
||||||
|
classifiers=[
|
||||||
|
'License :: OSI Approved :: BSD License',
|
||||||
|
'Operating System :: POSIX :: Linux',
|
||||||
|
'Intended Audience :: Science/Research',
|
||||||
|
'Intended Audience :: Telecommunications Industry',
|
||||||
|
'Intended Audience :: Information Technology',
|
||||||
|
'Programming Language :: Python :: 3',
|
||||||
|
'Topic :: Security',
|
||||||
|
'Topic :: Internet',
|
||||||
|
],
|
||||||
|
)
|
Loading…
Reference in New Issue