mirror of https://github.com/CIRCL/lookyloo
Add initial web interface
parent
eb71d7a3eb
commit
f7f1abc9e4
|
@ -1,3 +1,8 @@
|
|||
# Local exclude
|
||||
scraped/
|
||||
*.swp
|
||||
lookyloo/ete3_webserver/webapi.py
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
|
46
README.md
46
README.md
|
@ -1,2 +1,44 @@
|
|||
# lookyloo
|
||||
*Lookyloo* is a web interface allowing to scrape a website and then displays a tree of domains calling each other.
|
||||
Lookyloo
|
||||
========
|
||||
|
||||
*Lookyloo* is a web interface allowing to scrape a website and then displays a
|
||||
tree of domains calling each other.
|
||||
|
||||
|
||||
What is that name?!
|
||||
===================
|
||||
|
||||
|
||||
```
|
||||
1. People who just come to look.
|
||||
2. People who go out of their way to look at people or something often causing crowds and more disruption.
|
||||
3. People who enjoy staring at watching other peoples misfortune. Oftentimes car onlookers to car accidents.
|
||||
Same as Looky Lou; often spelled as Looky-loo (hyphen) or lookylou
|
||||
In L.A. usually the lookyloo's cause more accidents by not paying full attention to what is ahead of them.
|
||||
```
|
||||
|
||||
Source: Urban Dictionary
|
||||
|
||||
|
||||
Implementation details
|
||||
======================
|
||||
|
||||
This code is very heavily inspired by https://github.com/etetoolkit/webplugin and adapted to use flask as backend.
|
||||
|
||||
Installation of har2tree
|
||||
========================
|
||||
|
||||
The core dependency is ETE Toolkit, which you can install following the guide
|
||||
on the official website: http://etetoolkit.org/download/
|
||||
|
||||
Protip
|
||||
======
|
||||
|
||||
If you like using virtualenv and have `pew` installed you can also do it this way:
|
||||
|
||||
```bash
|
||||
sudo apt-get install python-qt4
|
||||
pip install -r requirements.txt
|
||||
pew toggleglobalsitepackages # PyQt4 is not easily installable in a virtualenv
|
||||
pip install -e .
|
||||
```
|
||||
|
|
|
@ -0,0 +1,85 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import json
|
||||
|
||||
from har2tree import CrawledTree, hostname_treestyle
|
||||
from scrapysplashwrapper import crawl
|
||||
from ete3_webserver import NodeActions, WebTreeHandler
|
||||
|
||||
from flask import Flask, render_template, request
|
||||
from flask_bootstrap import Bootstrap
|
||||
|
||||
from glob import glob
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
Bootstrap(app)
|
||||
app.config['BOOTSTRAP_SERVE_LOCAL'] = True
|
||||
app.debug = True
|
||||
|
||||
HAR_DIR = 'scraped'
|
||||
SPLASH = 'http://127.0.0.1:8050'
|
||||
|
||||
|
||||
def load_tree(report_dir):
|
||||
har_files = sorted(glob(os.path.join(HAR_DIR, report_dir, '*.har')))
|
||||
ct = CrawledTree(har_files)
|
||||
ct.find_parents()
|
||||
ct.join_trees()
|
||||
ct.root_hartree.make_hostname_tree()
|
||||
actions = NodeActions()
|
||||
style = hostname_treestyle()
|
||||
return WebTreeHandler(ct.root_hartree.hostname_tree, actions, style)
|
||||
|
||||
|
||||
@app.route('/scrap', methods=['GET', 'POST'])
|
||||
def scrap():
|
||||
if request.form.get('url'):
|
||||
url = request.form.get('url')
|
||||
depth = request.form.get('depth')
|
||||
items = crawl(SPLASH, url, depth)
|
||||
if not items:
|
||||
# broken
|
||||
pass
|
||||
width = len(str(len(items)))
|
||||
i = 1
|
||||
dirpath = os.path.join(HAR_DIR, datetime.now().isoformat())
|
||||
os.makedirs(dirpath)
|
||||
for item in items:
|
||||
harfile = item['har']
|
||||
with open(os.path.join(dirpath, '{0:0{width}}.har'.format(i, width=width)), 'w') as f:
|
||||
json.dump(harfile, f)
|
||||
i += 1
|
||||
return tree(-1)
|
||||
return render_template('scrap.html')
|
||||
|
||||
|
||||
@app.route('/tree/<int:tree_id>', methods=['GET'])
|
||||
def tree(tree_id):
|
||||
report_dir = sorted(os.listdir(HAR_DIR))[tree_id]
|
||||
tree = load_tree(report_dir)
|
||||
nodes, faces, base64 = tree.redraw()
|
||||
return render_template('tree.html', nodes=nodes, faces=faces, base64_img=base64)
|
||||
|
||||
|
||||
@app.route('/', methods=['GET'])
|
||||
def index():
|
||||
i = 0
|
||||
titles = []
|
||||
for report_dir in sorted(os.listdir(HAR_DIR)):
|
||||
har_files = sorted(glob(os.path.join(HAR_DIR, report_dir, '*.har')))
|
||||
if not har_files:
|
||||
continue
|
||||
with open(har_files[0], 'r') as f:
|
||||
j = json.load(f)
|
||||
titles.append((i, j['log']['pages'][0]['title']))
|
||||
i += 1
|
||||
|
||||
return render_template('index.html', titles=titles)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(port=5001)
|
|
@ -0,0 +1,3 @@
|
|||
from .tree_handler import WebTreeHandler, NodeActions
|
||||
|
||||
__all__ = ['WebTreeHandler', 'NodeActions']
|
|
@ -0,0 +1,90 @@
|
|||
import time
|
||||
import string
|
||||
import random
|
||||
# import logging as log
|
||||
from ete3 import Tree # , TreeStyle
|
||||
from ete3.parser.newick import NewickError
|
||||
|
||||
|
||||
def timeit(f):
|
||||
def a_wrapper_accepting_arguments(*args, **kargs):
|
||||
t1 = time.time()
|
||||
r = f(*args, **kargs)
|
||||
print(" %0.3f secs: %s" % (time.time() - t1, f.__name__))
|
||||
return r
|
||||
return a_wrapper_accepting_arguments
|
||||
|
||||
|
||||
def id_generator(size=6, chars=string.ascii_uppercase + string.digits):
|
||||
return ''.join(random.choice(chars) for _ in range(size))
|
||||
|
||||
|
||||
class WebTreeHandler(object):
|
||||
def __init__(self, newick, actions, style):
|
||||
if isinstance(newick, Tree):
|
||||
self.tree = newick
|
||||
else:
|
||||
try:
|
||||
self.tree = Tree(newick)
|
||||
except NewickError:
|
||||
self.tree = Tree(newick, format=1)
|
||||
|
||||
self.tree.actions = actions
|
||||
self.tree.tree_style = style
|
||||
|
||||
# Initialze node internal IDs
|
||||
for index, n in enumerate(self.tree.traverse('preorder')):
|
||||
n._nid = index
|
||||
|
||||
@timeit
|
||||
def redraw(self):
|
||||
base64_img, img_map = self.tree.render("%%return.PNG", tree_style=self.tree.tree_style)
|
||||
nodes, faces = self.get_html_map(img_map)
|
||||
base64 = base64_img.data().decode()
|
||||
return nodes, faces, base64
|
||||
|
||||
def get_html_map(self, img_map):
|
||||
nodes = []
|
||||
if img_map.get("nodes"):
|
||||
for x1, y1, x2, y2, nodeid, text in img_map["nodes"]:
|
||||
nodes.append([x1, y1, x2, y2, nodeid, text, img_map["node_areas"].get(int(nodeid), [0, 0, 0, 0])])
|
||||
faces = []
|
||||
if img_map.get("faces"):
|
||||
for x1, y1, x2, y2, nodeid, text in img_map["faces"]:
|
||||
faces.append([x1, y1, x2, y2, nodeid, text, img_map["node_areas"].get(int(nodeid), [0, 0, 0, 0])])
|
||||
return nodes, faces
|
||||
|
||||
def get_avail_actions(self, nodeid):
|
||||
target = self.tree.search_nodes(_nid=int(nodeid))[0]
|
||||
action_list = []
|
||||
for aindex, aname, show_fn, run_fn in self.tree.actions:
|
||||
if show_fn(target):
|
||||
action_list.append([aindex, aname])
|
||||
return action_list
|
||||
|
||||
def run_action(self, aindex, nodeid):
|
||||
target = self.tree.search_nodes(_nid=int(nodeid))[0]
|
||||
run_fn = self.tree.actions.actions[aindex][2]
|
||||
return run_fn(self.tree, target)
|
||||
|
||||
|
||||
class NodeActions(object):
|
||||
def __str__(self):
|
||||
text = []
|
||||
for aindex, aname, show_fn, run_fn in self:
|
||||
text.append("%s: %s, %s, %s" % (aindex, aname, show_fn, run_fn))
|
||||
return '\n'.join(text)
|
||||
|
||||
def __iter__(self):
|
||||
for aindex, (aname, show_fn, run_fn) in self.actions.items():
|
||||
yield (aindex, aname, show_fn, run_fn)
|
||||
|
||||
def __init__(self):
|
||||
self.actions = {}
|
||||
|
||||
def clear_default_actions(self):
|
||||
self.actions = {}
|
||||
|
||||
def add_action(self, action_name, show_fn, run_fn):
|
||||
aindex = "act_" + id_generator()
|
||||
self.actions[aindex] = [action_name, show_fn, run_fn]
|
|
@ -0,0 +1,8 @@
|
|||
#highlighter {
|
||||
position: absolute;
|
||||
visibility: visible;
|
||||
z-index:100;
|
||||
top:0; left:0;
|
||||
width: 70px; height: 70px;
|
||||
border: 2px solid indianred;
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
function highlight_node(x, y, width, height){
|
||||
//console.log(treeid, nodeid, x, y, width, height);
|
||||
var img = $('#img');
|
||||
var offset = img.offset();
|
||||
// console.log(img);
|
||||
// console.log(offset);
|
||||
|
||||
$("#highlighter").show();
|
||||
$("#highlighter").css("top", offset.top+y-1);
|
||||
$("#highlighter").css("left", offset.left+x-1);
|
||||
$("#highlighter").css("width", width+1);
|
||||
$("#highlighter").css("height", height+1);
|
||||
|
||||
}
|
||||
function unhighlight_node(){
|
||||
// console.log("unhighlight");
|
||||
$("#highlighter").hide();
|
||||
}
|
||||
|
Binary file not shown.
After Width: | Height: | Size: 7.2 KiB |
|
@ -0,0 +1,15 @@
|
|||
{% extends "main.html" %}
|
||||
|
||||
{% block title %}Tree{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<center>
|
||||
<h2><a href="{{ url_for('scrap') }}">Scrap a page</a></h2></br></br>
|
||||
</center>
|
||||
<center>
|
||||
{% for id, page_title in titles %}
|
||||
<a href="{{ url_for('tree', tree_id=id) }}">{{ page_title }}</a></br>
|
||||
</br>
|
||||
{% endfor %}
|
||||
</center>
|
||||
{% endblock %}
|
|
@ -0,0 +1,11 @@
|
|||
{% extends "bootstrap/base.html" %}
|
||||
|
||||
{% block scripts %}
|
||||
{{ super() }}
|
||||
<script src='{{ url_for('static', filename='ete.js') }}'></script>
|
||||
{% endblock %}
|
||||
|
||||
{% block head %}
|
||||
{{ super() }}
|
||||
<link rel="stylesheet" type="text/css" href="{{ url_for('static',filename='ete.css') }}">
|
||||
{% endblock %}
|
|
@ -0,0 +1,16 @@
|
|||
{% extends "main.html" %}
|
||||
{% block title %}Scrap{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="container">
|
||||
<h1>Scrap a page</h1>
|
||||
|
||||
<form class="form-inline" role="form" action="scrap" method=post>
|
||||
<div class="form-group">
|
||||
<input type="text" class="form-control" name="url" id=url placeholder="URL to scrap">
|
||||
<input type="text" class="form-control" name="depth" id=depth placeholder="Depth">
|
||||
</div>
|
||||
<button type="submit" class="btn btn-default">Scrap</button>
|
||||
</form>
|
||||
</div>
|
||||
{% endblock %}
|
|
@ -0,0 +1,31 @@
|
|||
{% extends "main.html" %}
|
||||
|
||||
{% block title %}Tree{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div id="highlighter" style="display: none;"></div>
|
||||
<MAP NAME="map" class="ete_tree_img">
|
||||
{% for x1, y1, x2, y2, nodeid, text, area in nodes %}
|
||||
<div class="popup">
|
||||
<AREA SHAPE="rect" COORDS="{{ x1 }}, {{ y1 }}, {{ x2 }}, {{ y2 }}"
|
||||
onMouseOut='unhighlight_node();'
|
||||
onMouseOver='highlight_node("{{ nodeid }}", "{{ text }}", {{ area[0] }}, {{ area[1] }}, {{ area[2]-area[0] }}, {{ area[3]-area[1] }});'
|
||||
onclick='showPopup({{ nodeid }});'
|
||||
href="javascript:void('{{ nodeid }}');">
|
||||
</div>
|
||||
{% endfor %}
|
||||
{% for x1, y1, x2, y2, nodeid, text, area in faces %}
|
||||
<div class="popup">
|
||||
<AREA SHAPE="rect" COORDS="{{ x1 }}, {{ y1 }}, {{ x2 }}, {{ y2 }}"
|
||||
onMouseOut='unhighlight_node();'
|
||||
onMouseOver='highlight_node({{ area[0] }}, {{ area[1] }}, {{ area[2]-area[0] }}, {{ area[3]-area[1] }});'
|
||||
onclick='showPopup("{{ nodeid }}");'
|
||||
href="javascript:void('{{ nodeid }}');">
|
||||
</div>
|
||||
{% endfor %}
|
||||
</MAP>
|
||||
<div id="box">
|
||||
<img id="img" class="ete_tree_img" USEMAP="#map" src="data:image/png;base64,{{ base64_img }}">
|
||||
<div style="margin:0px;padding:0px;text-align:left;"><a href="http://etetoolkit.org" style="font-size:7pt;" target="_blank" >Powered by etetoolkit</a></div>
|
||||
</div>
|
||||
{% endblock %}
|
|
@ -0,0 +1,10 @@
|
|||
# Web thing
|
||||
flask
|
||||
flask-bootstrap
|
||||
|
||||
# Backend libs
|
||||
git+https://github.com/viper-framework/har2tree.git
|
||||
git+https://github.com/viper-framework/ScrapySplashWrapper.git
|
||||
|
||||
# Required for the drawing (latest version)
|
||||
git+https://github.com/etetoolkit/ete.git
|
|
@ -0,0 +1,26 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
from setuptools import setup
|
||||
|
||||
|
||||
setup(
|
||||
name='lookyloo',
|
||||
version='0.1',
|
||||
author='Raphaël Vinot',
|
||||
author_email='raphael.vinot@circl.lu',
|
||||
maintainer='Raphaël Vinot',
|
||||
url='https://github.com/CIRCL/lookyloo',
|
||||
description='Web interface to track the trackers.',
|
||||
packages=['lookyloo'],
|
||||
include_package_data=True,
|
||||
classifiers=[
|
||||
'License :: OSI Approved :: BSD License',
|
||||
'Operating System :: POSIX :: Linux',
|
||||
'Intended Audience :: Science/Research',
|
||||
'Intended Audience :: Telecommunications Industry',
|
||||
'Intended Audience :: Information Technology',
|
||||
'Programming Language :: Python :: 3',
|
||||
'Topic :: Security',
|
||||
'Topic :: Internet',
|
||||
],
|
||||
)
|
Loading…
Reference in New Issue