Add initial web interface

pull/1/head
Raphaël Vinot 2017-07-23 19:56:51 +02:00
parent eb71d7a3eb
commit f7f1abc9e4
14 changed files with 363 additions and 2 deletions

5
.gitignore vendored
View File

@ -1,3 +1,8 @@
# Local exclude
scraped/
*.swp
lookyloo/ete3_webserver/webapi.py
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]

View File

@ -1,2 +1,44 @@
# lookyloo
*Lookyloo* is a web interface allowing to scrape a website and then displays a tree of domains calling each other.
Lookyloo
========
*Lookyloo* is a web interface allowing to scrape a website and then displays a
tree of domains calling each other.
What is that name?!
===================
```
1. People who just come to look.
2. People who go out of their way to look at people or something often causing crowds and more disruption.
3. People who enjoy staring at watching other peoples misfortune. Oftentimes car onlookers to car accidents.
Same as Looky Lou; often spelled as Looky-loo (hyphen) or lookylou
In L.A. usually the lookyloo's cause more accidents by not paying full attention to what is ahead of them.
```
Source: Urban Dictionary
Implementation details
======================
This code is very heavily inspired by https://github.com/etetoolkit/webplugin and adapted to use flask as backend.
Installation of har2tree
========================
The core dependency is ETE Toolkit, which you can install following the guide
on the official website: http://etetoolkit.org/download/
Protip
======
If you like using virtualenv and have `pew` installed you can also do it this way:
```bash
sudo apt-get install python-qt4
pip install -r requirements.txt
pew toggleglobalsitepackages # PyQt4 is not easily installable in a virtualenv
pip install -e .
```

85
lookyloo/__init__.py Normal file
View File

@ -0,0 +1,85 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
from har2tree import CrawledTree, hostname_treestyle
from scrapysplashwrapper import crawl
from ete3_webserver import NodeActions, WebTreeHandler
from flask import Flask, render_template, request
from flask_bootstrap import Bootstrap
from glob import glob
import os
from datetime import datetime
app = Flask(__name__)
Bootstrap(app)
app.config['BOOTSTRAP_SERVE_LOCAL'] = True
app.debug = True
HAR_DIR = 'scraped'
SPLASH = 'http://127.0.0.1:8050'
def load_tree(report_dir):
har_files = sorted(glob(os.path.join(HAR_DIR, report_dir, '*.har')))
ct = CrawledTree(har_files)
ct.find_parents()
ct.join_trees()
ct.root_hartree.make_hostname_tree()
actions = NodeActions()
style = hostname_treestyle()
return WebTreeHandler(ct.root_hartree.hostname_tree, actions, style)
@app.route('/scrap', methods=['GET', 'POST'])
def scrap():
if request.form.get('url'):
url = request.form.get('url')
depth = request.form.get('depth')
items = crawl(SPLASH, url, depth)
if not items:
# broken
pass
width = len(str(len(items)))
i = 1
dirpath = os.path.join(HAR_DIR, datetime.now().isoformat())
os.makedirs(dirpath)
for item in items:
harfile = item['har']
with open(os.path.join(dirpath, '{0:0{width}}.har'.format(i, width=width)), 'w') as f:
json.dump(harfile, f)
i += 1
return tree(-1)
return render_template('scrap.html')
@app.route('/tree/<int:tree_id>', methods=['GET'])
def tree(tree_id):
report_dir = sorted(os.listdir(HAR_DIR))[tree_id]
tree = load_tree(report_dir)
nodes, faces, base64 = tree.redraw()
return render_template('tree.html', nodes=nodes, faces=faces, base64_img=base64)
@app.route('/', methods=['GET'])
def index():
i = 0
titles = []
for report_dir in sorted(os.listdir(HAR_DIR)):
har_files = sorted(glob(os.path.join(HAR_DIR, report_dir, '*.har')))
if not har_files:
continue
with open(har_files[0], 'r') as f:
j = json.load(f)
titles.append((i, j['log']['pages'][0]['title']))
i += 1
return render_template('index.html', titles=titles)
if __name__ == '__main__':
app.run(port=5001)

View File

@ -0,0 +1,3 @@
from .tree_handler import WebTreeHandler, NodeActions
__all__ = ['WebTreeHandler', 'NodeActions']

View File

@ -0,0 +1,90 @@
import time
import string
import random
# import logging as log
from ete3 import Tree # , TreeStyle
from ete3.parser.newick import NewickError
def timeit(f):
def a_wrapper_accepting_arguments(*args, **kargs):
t1 = time.time()
r = f(*args, **kargs)
print(" %0.3f secs: %s" % (time.time() - t1, f.__name__))
return r
return a_wrapper_accepting_arguments
def id_generator(size=6, chars=string.ascii_uppercase + string.digits):
return ''.join(random.choice(chars) for _ in range(size))
class WebTreeHandler(object):
def __init__(self, newick, actions, style):
if isinstance(newick, Tree):
self.tree = newick
else:
try:
self.tree = Tree(newick)
except NewickError:
self.tree = Tree(newick, format=1)
self.tree.actions = actions
self.tree.tree_style = style
# Initialze node internal IDs
for index, n in enumerate(self.tree.traverse('preorder')):
n._nid = index
@timeit
def redraw(self):
base64_img, img_map = self.tree.render("%%return.PNG", tree_style=self.tree.tree_style)
nodes, faces = self.get_html_map(img_map)
base64 = base64_img.data().decode()
return nodes, faces, base64
def get_html_map(self, img_map):
nodes = []
if img_map.get("nodes"):
for x1, y1, x2, y2, nodeid, text in img_map["nodes"]:
nodes.append([x1, y1, x2, y2, nodeid, text, img_map["node_areas"].get(int(nodeid), [0, 0, 0, 0])])
faces = []
if img_map.get("faces"):
for x1, y1, x2, y2, nodeid, text in img_map["faces"]:
faces.append([x1, y1, x2, y2, nodeid, text, img_map["node_areas"].get(int(nodeid), [0, 0, 0, 0])])
return nodes, faces
def get_avail_actions(self, nodeid):
target = self.tree.search_nodes(_nid=int(nodeid))[0]
action_list = []
for aindex, aname, show_fn, run_fn in self.tree.actions:
if show_fn(target):
action_list.append([aindex, aname])
return action_list
def run_action(self, aindex, nodeid):
target = self.tree.search_nodes(_nid=int(nodeid))[0]
run_fn = self.tree.actions.actions[aindex][2]
return run_fn(self.tree, target)
class NodeActions(object):
def __str__(self):
text = []
for aindex, aname, show_fn, run_fn in self:
text.append("%s: %s, %s, %s" % (aindex, aname, show_fn, run_fn))
return '\n'.join(text)
def __iter__(self):
for aindex, (aname, show_fn, run_fn) in self.actions.items():
yield (aindex, aname, show_fn, run_fn)
def __init__(self):
self.actions = {}
def clear_default_actions(self):
self.actions = {}
def add_action(self, action_name, show_fn, run_fn):
aindex = "act_" + id_generator()
self.actions[aindex] = [action_name, show_fn, run_fn]

8
lookyloo/static/ete.css Normal file
View File

@ -0,0 +1,8 @@
#highlighter {
position: absolute;
visibility: visible;
z-index:100;
top:0; left:0;
width: 70px; height: 70px;
border: 2px solid indianred;
}

19
lookyloo/static/ete.js Normal file
View File

@ -0,0 +1,19 @@
function highlight_node(x, y, width, height){
//console.log(treeid, nodeid, x, y, width, height);
var img = $('#img');
var offset = img.offset();
// console.log(img);
// console.log(offset);
$("#highlighter").show();
$("#highlighter").css("top", offset.top+y-1);
$("#highlighter").css("left", offset.left+x-1);
$("#highlighter").css("width", width+1);
$("#highlighter").css("height", height+1);
}
function unhighlight_node(){
// console.log("unhighlight");
$("#highlighter").hide();
}

BIN
lookyloo/static/loader.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.2 KiB

View File

@ -0,0 +1,15 @@
{% extends "main.html" %}
{% block title %}Tree{% endblock %}
{% block content %}
<center>
<h2><a href="{{ url_for('scrap') }}">Scrap a page</a></h2></br></br>
</center>
<center>
{% for id, page_title in titles %}
<a href="{{ url_for('tree', tree_id=id) }}">{{ page_title }}</a></br>
</br>
{% endfor %}
</center>
{% endblock %}

View File

@ -0,0 +1,11 @@
{% extends "bootstrap/base.html" %}
{% block scripts %}
{{ super() }}
<script src='{{ url_for('static', filename='ete.js') }}'></script>
{% endblock %}
{% block head %}
{{ super() }}
<link rel="stylesheet" type="text/css" href="{{ url_for('static',filename='ete.css') }}">
{% endblock %}

View File

@ -0,0 +1,16 @@
{% extends "main.html" %}
{% block title %}Scrap{% endblock %}
{% block content %}
<div class="container">
<h1>Scrap a page</h1>
<form class="form-inline" role="form" action="scrap" method=post>
<div class="form-group">
<input type="text" class="form-control" name="url" id=url placeholder="URL to scrap">
<input type="text" class="form-control" name="depth" id=depth placeholder="Depth">
</div>
<button type="submit" class="btn btn-default">Scrap</button>
</form>
</div>
{% endblock %}

View File

@ -0,0 +1,31 @@
{% extends "main.html" %}
{% block title %}Tree{% endblock %}
{% block content %}
<div id="highlighter" style="display: none;"></div>
<MAP NAME="map" class="ete_tree_img">
{% for x1, y1, x2, y2, nodeid, text, area in nodes %}
<div class="popup">
<AREA SHAPE="rect" COORDS="{{ x1 }}, {{ y1 }}, {{ x2 }}, {{ y2 }}"
onMouseOut='unhighlight_node();'
onMouseOver='highlight_node("{{ nodeid }}", "{{ text }}", {{ area[0] }}, {{ area[1] }}, {{ area[2]-area[0] }}, {{ area[3]-area[1] }});'
onclick='showPopup({{ nodeid }});'
href="javascript:void('{{ nodeid }}');">
</div>
{% endfor %}
{% for x1, y1, x2, y2, nodeid, text, area in faces %}
<div class="popup">
<AREA SHAPE="rect" COORDS="{{ x1 }}, {{ y1 }}, {{ x2 }}, {{ y2 }}"
onMouseOut='unhighlight_node();'
onMouseOver='highlight_node({{ area[0] }}, {{ area[1] }}, {{ area[2]-area[0] }}, {{ area[3]-area[1] }});'
onclick='showPopup("{{ nodeid }}");'
href="javascript:void('{{ nodeid }}');">
</div>
{% endfor %}
</MAP>
<div id="box">
<img id="img" class="ete_tree_img" USEMAP="#map" src="data:image/png;base64,{{ base64_img }}">
<div style="margin:0px;padding:0px;text-align:left;"><a href="http://etetoolkit.org" style="font-size:7pt;" target="_blank" >Powered by etetoolkit</a></div>
</div>
{% endblock %}

10
requirements.txt Normal file
View File

@ -0,0 +1,10 @@
# Web thing
flask
flask-bootstrap
# Backend libs
git+https://github.com/viper-framework/har2tree.git
git+https://github.com/viper-framework/ScrapySplashWrapper.git
# Required for the drawing (latest version)
git+https://github.com/etetoolkit/ete.git

26
setup.py Normal file
View File

@ -0,0 +1,26 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from setuptools import setup
setup(
name='lookyloo',
version='0.1',
author='Raphaël Vinot',
author_email='raphael.vinot@circl.lu',
maintainer='Raphaël Vinot',
url='https://github.com/CIRCL/lookyloo',
description='Web interface to track the trackers.',
packages=['lookyloo'],
include_package_data=True,
classifiers=[
'License :: OSI Approved :: BSD License',
'Operating System :: POSIX :: Linux',
'Intended Audience :: Science/Research',
'Intended Audience :: Telecommunications Industry',
'Intended Audience :: Information Technology',
'Programming Language :: Python :: 3',
'Topic :: Security',
'Topic :: Internet',
],
)