Merge branch 'index_reorg'

pull/67/head
Raphaël Vinot 2020-02-10 11:22:03 +01:00
commit d3a38edb3e
9 changed files with 129 additions and 68 deletions

View File

@ -30,7 +30,7 @@ import logging
from pysanejs import SaneJS
from scrapysplashwrapper import crawl
from har2tree import CrawledTree, Har2TreeError
from har2tree import CrawledTree, Har2TreeError, HarFile
class Lookyloo():
@ -72,12 +72,13 @@ class Lookyloo():
return
with (report_dir / 'uuid').open() as f:
uuid = f.read().strip()
with har_files[0].open() as f:
j = json.load(f)
title = j['log']['pages'][0]['title']
if not title:
title = '!! No title found !! '
cache = {'uuid': uuid, 'title': title}
har = HarFile(har_files[0])
cache: Dict[str, Union[str, int]] = {'uuid': uuid,
'title': har.initial_title,
'timestamp': har.initial_start_time,
'url': har.first_url,
'redirects': json.dumps(har.initial_redirects)}
if (report_dir / 'no_index').exists(): # If the folders claims anonymity
cache['no_index'] = 1
if uuid and not self.redis.exists(str(report_dir)):
@ -87,7 +88,9 @@ class Lookyloo():
def report_cache(self, report_dir: Union[str, Path]) -> Dict:
if isinstance(report_dir, Path):
report_dir = str(report_dir)
return self.redis.hgetall(report_dir)
cached = self.redis.hgetall(report_dir)
cached['redirects'] = json.loads(cached['redirects'])
return cached
def _init_existing_dumps(self) -> None:
for report_dir in self.report_dirs:

84
poetry.lock generated
View File

@ -80,7 +80,7 @@ description = "Foreign Function Interface for Python calling C code."
name = "cffi"
optional = false
python-versions = "*"
version = "1.13.2"
version = "1.14.0"
[package.dependencies]
pycparser = "*"
@ -199,7 +199,7 @@ description = ""
name = "har2tree"
optional = false
python-versions = "^3.6"
version = "0.1.0"
version = "1.0"
[package.dependencies]
beautifulsoup4 = "^4.8.2"
@ -208,7 +208,7 @@ lxml = "^4.4.2"
six = "^1.14.0"
[package.source]
reference = "12c88d6298e52a8458e220b6910597cb5539daa9"
reference = "ff0f6294728902e7e36c294e5b51ac51ba894a03"
type = "git"
url = "https://github.com/viper-framework/har2tree.git"
[[package]]
@ -431,13 +431,13 @@ description = ""
name = "pysanejs"
optional = false
python-versions = "^3.6"
version = "0.1.0"
version = "1.0"
[package.dependencies]
requests = "^2.22.0"
[package.source]
reference = "bdc091fbae7019c39b47a149b12f8ac032eda2a3"
reference = "3ea143f44d37ab701c70ffb38408528ddb4e2b6e"
type = "git"
url = "https://github.com/CIRCL/PySaneJS.git"
[[package]]
@ -529,14 +529,14 @@ description = ""
name = "scrapysplashwrapper"
optional = false
python-versions = "^3.6"
version = "0.1.0"
version = "1.0"
[package.dependencies]
scrapy = "^1.8.0"
scrapy-splash = "^0.7.2"
[package.source]
reference = "2bbc9c1dd405993bc775e62447fac3355dfbde74"
reference = "d781ff5867504f50ce9411fc7cad1a653dd2a02d"
type = "git"
url = "https://github.com/viper-framework/ScrapySplashWrapper.git"
[[package]]
@ -650,12 +650,11 @@ category = "main"
description = "The comprehensive WSGI web application library."
name = "werkzeug"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
version = "0.16.1"
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
version = "1.0.0"
[package.extras]
dev = ["pytest", "coverage", "tox", "sphinx", "pallets-sphinx-themes", "sphinx-issues"]
termcolor = ["termcolor"]
watchdog = ["watchdog"]
[[package]]
@ -726,39 +725,34 @@ certifi = [
{file = "certifi-2019.11.28.tar.gz", hash = "sha256:25b64c7da4cd7479594d035c08c2d809eb4aab3a26e5a990ea98cc450c320f1f"},
]
cffi = [
{file = "cffi-1.13.2-cp27-cp27m-macosx_10_6_intel.whl", hash = "sha256:3c9fff570f13480b201e9ab69453108f6d98244a7f495e91b6c654a47486ba43"},
{file = "cffi-1.13.2-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:2c5e309ec482556397cb21ede0350c5e82f0eb2621de04b2633588d118da4396"},
{file = "cffi-1.13.2-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:19db0cdd6e516f13329cba4903368bff9bb5a9331d3410b1b448daaadc495e54"},
{file = "cffi-1.13.2-cp27-cp27m-win32.whl", hash = "sha256:5c4fae4e9cdd18c82ba3a134be256e98dc0596af1e7285a3d2602c97dcfa5159"},
{file = "cffi-1.13.2-cp27-cp27m-win_amd64.whl", hash = "sha256:32a262e2b90ffcfdd97c7a5e24a6012a43c61f1f5a57789ad80af1d26c6acd97"},
{file = "cffi-1.13.2-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:4a43c91840bda5f55249413037b7a9b79c90b1184ed504883b72c4df70778579"},
{file = "cffi-1.13.2-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:8169cf44dd8f9071b2b9248c35fc35e8677451c52f795daa2bb4643f32a540bc"},
{file = "cffi-1.13.2-cp34-cp34m-macosx_10_6_intel.whl", hash = "sha256:71a608532ab3bd26223c8d841dde43f3516aa5d2bf37b50ac410bb5e99053e8f"},
{file = "cffi-1.13.2-cp34-cp34m-manylinux1_i686.whl", hash = "sha256:7f627141a26b551bdebbc4855c1157feeef18241b4b8366ed22a5c7d672ef858"},
{file = "cffi-1.13.2-cp34-cp34m-manylinux1_x86_64.whl", hash = "sha256:0b49274afc941c626b605fb59b59c3485c17dc776dc3cc7cc14aca74cc19cc42"},
{file = "cffi-1.13.2-cp34-cp34m-win32.whl", hash = "sha256:4424e42199e86b21fc4db83bd76909a6fc2a2aefb352cb5414833c030f6ed71b"},
{file = "cffi-1.13.2-cp34-cp34m-win_amd64.whl", hash = "sha256:7d4751da932caaec419d514eaa4215eaf14b612cff66398dd51129ac22680b20"},
{file = "cffi-1.13.2-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:ccb032fda0873254380aa2bfad2582aedc2959186cce61e3a17abc1a55ff89c3"},
{file = "cffi-1.13.2-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:dcd65317dd15bc0451f3e01c80da2216a31916bdcffd6221ca1202d96584aa25"},
{file = "cffi-1.13.2-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:135f69aecbf4517d5b3d6429207b2dff49c876be724ac0c8bf8e1ea99df3d7e5"},
{file = "cffi-1.13.2-cp35-cp35m-win32.whl", hash = "sha256:7b93a885bb13073afb0aa73ad82059a4c41f4b7d8eb8368980448b52d4c7dc2c"},
{file = "cffi-1.13.2-cp35-cp35m-win_amd64.whl", hash = "sha256:e570d3ab32e2c2861c4ebe6ffcad6a8abf9347432a37608fe1fbd157b3f0036b"},
{file = "cffi-1.13.2-cp36-cp36m-macosx_10_6_intel.whl", hash = "sha256:0e3ea92942cb1168e38c05c1d56b0527ce31f1a370f6117f1d490b8dcd6b3a04"},
{file = "cffi-1.13.2-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:5ecfa867dea6fabe2a58f03ac9186ea64da1386af2159196da51c4904e11d652"},
{file = "cffi-1.13.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:291f7c42e21d72144bb1c1b2e825ec60f46d0a7468f5346841860454c7aa8f57"},
{file = "cffi-1.13.2-cp36-cp36m-win32.whl", hash = "sha256:62f2578358d3a92e4ab2d830cd1c2049c9c0d0e6d3c58322993cc341bdeac22e"},
{file = "cffi-1.13.2-cp36-cp36m-win_amd64.whl", hash = "sha256:fd43a88e045cf992ed09fa724b5315b790525f2676883a6ea64e3263bae6549d"},
{file = "cffi-1.13.2-cp37-cp37m-macosx_10_6_intel.whl", hash = "sha256:d75c461e20e29afc0aee7172a0950157c704ff0dd51613506bd7d82b718e7410"},
{file = "cffi-1.13.2-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:aa00d66c0fab27373ae44ae26a66a9e43ff2a678bf63a9c7c1a9a4d61172827a"},
{file = "cffi-1.13.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:2e9c80a8c3344a92cb04661115898a9129c074f7ab82011ef4b612f645939f12"},
{file = "cffi-1.13.2-cp37-cp37m-win32.whl", hash = "sha256:d754f39e0d1603b5b24a7f8484b22d2904fa551fe865fd0d4c3332f078d20d4e"},
{file = "cffi-1.13.2-cp37-cp37m-win_amd64.whl", hash = "sha256:6471a82d5abea994e38d2c2abc77164b4f7fbaaf80261cb98394d5793f11b12a"},
{file = "cffi-1.13.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:74a1d8c85fb6ff0b30fbfa8ad0ac23cd601a138f7509dc617ebc65ef305bb98d"},
{file = "cffi-1.13.2-cp38-cp38-manylinux1_i686.whl", hash = "sha256:42194f54c11abc8583417a7cf4eaff544ce0de8187abaf5d29029c91b1725ad3"},
{file = "cffi-1.13.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:415bdc7ca8c1c634a6d7163d43fb0ea885a07e9618a64bda407e04b04333b7db"},
{file = "cffi-1.13.2-cp38-cp38-win32.whl", hash = "sha256:6d4f18483d040e18546108eb13b1dfa1000a089bcf8529e30346116ea6240506"},
{file = "cffi-1.13.2-cp38-cp38-win_amd64.whl", hash = "sha256:2781e9ad0e9d47173c0093321bb5435a9dfae0ed6a762aabafa13108f5f7b2ba"},
{file = "cffi-1.13.2.tar.gz", hash = "sha256:599a1e8ff057ac530c9ad1778293c665cb81a791421f46922d80a86473c13346"},
{file = "cffi-1.14.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:1cae98a7054b5c9391eb3249b86e0e99ab1e02bb0cc0575da191aedadbdf4384"},
{file = "cffi-1.14.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:cf16e3cf6c0a5fdd9bc10c21687e19d29ad1fe863372b5543deaec1039581a30"},
{file = "cffi-1.14.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:f2b0fa0c01d8a0c7483afd9f31d7ecf2d71760ca24499c8697aeb5ca37dc090c"},
{file = "cffi-1.14.0-cp27-cp27m-win32.whl", hash = "sha256:99f748a7e71ff382613b4e1acc0ac83bf7ad167fb3802e35e90d9763daba4d78"},
{file = "cffi-1.14.0-cp27-cp27m-win_amd64.whl", hash = "sha256:c420917b188a5582a56d8b93bdd8e0f6eca08c84ff623a4c16e809152cd35793"},
{file = "cffi-1.14.0-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:399aed636c7d3749bbed55bc907c3288cb43c65c4389964ad5ff849b6370603e"},
{file = "cffi-1.14.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:cab50b8c2250b46fe738c77dbd25ce017d5e6fb35d3407606e7a4180656a5a6a"},
{file = "cffi-1.14.0-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:001bf3242a1bb04d985d63e138230802c6c8d4db3668fb545fb5005ddf5bb5ff"},
{file = "cffi-1.14.0-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:e56c744aa6ff427a607763346e4170629caf7e48ead6921745986db3692f987f"},
{file = "cffi-1.14.0-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:b8c78301cefcf5fd914aad35d3c04c2b21ce8629b5e4f4e45ae6812e461910fa"},
{file = "cffi-1.14.0-cp35-cp35m-win32.whl", hash = "sha256:8c0ffc886aea5df6a1762d0019e9cb05f825d0eec1f520c51be9d198701daee5"},
{file = "cffi-1.14.0-cp35-cp35m-win_amd64.whl", hash = "sha256:8a6c688fefb4e1cd56feb6c511984a6c4f7ec7d2a1ff31a10254f3c817054ae4"},
{file = "cffi-1.14.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:95cd16d3dee553f882540c1ffe331d085c9e629499ceadfbda4d4fde635f4b7d"},
{file = "cffi-1.14.0-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:66e41db66b47d0d8672d8ed2708ba91b2f2524ece3dee48b5dfb36be8c2f21dc"},
{file = "cffi-1.14.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:028a579fc9aed3af38f4892bdcc7390508adabc30c6af4a6e4f611b0c680e6ac"},
{file = "cffi-1.14.0-cp36-cp36m-win32.whl", hash = "sha256:cef128cb4d5e0b3493f058f10ce32365972c554572ff821e175dbc6f8ff6924f"},
{file = "cffi-1.14.0-cp36-cp36m-win_amd64.whl", hash = "sha256:337d448e5a725bba2d8293c48d9353fc68d0e9e4088d62a9571def317797522b"},
{file = "cffi-1.14.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e577934fc5f8779c554639376beeaa5657d54349096ef24abe8c74c5d9c117c3"},
{file = "cffi-1.14.0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:62ae9af2d069ea2698bf536dcfe1e4eed9090211dbaafeeedf5cb6c41b352f66"},
{file = "cffi-1.14.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:14491a910663bf9f13ddf2bc8f60562d6bc5315c1f09c704937ef17293fb85b0"},
{file = "cffi-1.14.0-cp37-cp37m-win32.whl", hash = "sha256:c43866529f2f06fe0edc6246eb4faa34f03fe88b64a0a9a942561c8e22f4b71f"},
{file = "cffi-1.14.0-cp37-cp37m-win_amd64.whl", hash = "sha256:2089ed025da3919d2e75a4d963d008330c96751127dd6f73c8dc0c65041b4c26"},
{file = "cffi-1.14.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3b911c2dbd4f423b4c4fcca138cadde747abdb20d196c4a48708b8a2d32b16dd"},
{file = "cffi-1.14.0-cp38-cp38-manylinux1_i686.whl", hash = "sha256:7e63cbcf2429a8dbfe48dcc2322d5f2220b77b2e17b7ba023d6166d84655da55"},
{file = "cffi-1.14.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:3d311bcc4a41408cf5854f06ef2c5cab88f9fded37a3b95936c9879c1640d4c2"},
{file = "cffi-1.14.0-cp38-cp38-win32.whl", hash = "sha256:675686925a9fb403edba0114db74e741d8181683dcf216be697d208857e04ca8"},
{file = "cffi-1.14.0-cp38-cp38-win_amd64.whl", hash = "sha256:00789914be39dffba161cfc5be31b55775de5ba2235fe49aa28c148236c4e06b"},
{file = "cffi-1.14.0.tar.gz", hash = "sha256:2d384f4a127a15ba701207f7639d94106693b6cd64173d6c8988e2c25f3ac2b6"},
]
chardet = [
{file = "chardet-3.0.4-py2.py3-none-any.whl", hash = "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"},
@ -1066,8 +1060,8 @@ w3lib = [
{file = "w3lib-1.21.0.tar.gz", hash = "sha256:8b1854fef570b5a5fc84d960e025debd110485d73fd283580376104762774315"},
]
werkzeug = [
{file = "Werkzeug-0.16.1-py2.py3-none-any.whl", hash = "sha256:1e0dedc2acb1f46827daa2e399c1485c8fa17c0d8e70b6b875b4e7f54bf408d2"},
{file = "Werkzeug-0.16.1.tar.gz", hash = "sha256:b353856d37dec59d6511359f97f6a4b2468442e454bd1c98298ddce53cac1f04"},
{file = "Werkzeug-1.0.0-py2.py3-none-any.whl", hash = "sha256:6dc65cf9091cf750012f56f2cad759fa9e879f511b5ff8685e456b4e3bf90d16"},
{file = "Werkzeug-1.0.0.tar.gz", hash = "sha256:169ba8a33788476292d04186ab33b01d6add475033dfc07215e6d219cc077096"},
]
"zope.interface" = [
{file = "zope.interface-4.7.1-cp27-cp27m-macosx_10_6_intel.whl", hash = "sha256:14157421f4121a57625002cc4f48ac7521ea238d697c4a4459a884b62132b977"},

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "lookyloo"
version = "0.1.0"
version = "1.0"
description = "Web interface to track the trackers."
authors = ["Raphaël Vinot <raphael.vinot@circl.lu>"]
license = "BSD-3-Clause"

View File

@ -10,3 +10,8 @@ wget -q https://d3js.org/d3.v5.min.js -O web/static/d3.v5.min.js
FileSaver="v2.0.2"
wget -q https://raw.githubusercontent.com/eligrey/FileSaver.js/${FileSaver}/src/FileSaver.js -O web/static/FileSaver.js
bootstrap_table="1.15.5"
wget -q https://unpkg.com/bootstrap-table@${bootstrap_table}/dist/bootstrap-table.min.css -O web/static/bootstrap-table.min.css
wget -q https://unpkg.com/bootstrap-table@${bootstrap_table}/dist/bootstrap-table.min.js -O web/static/bootstrap-table.min.js

View File

@ -173,5 +173,8 @@ def index():
cached = lookyloo.report_cache(report_dir)
if not cached or 'no_index' in cached:
continue
titles.append((cached['uuid'], cached['title']))
date, time = cached['timestamp'].split('T')
time, _ = time.split('.', 1)
titles.append((cached['uuid'], cached['title'], date, time, cached['url'], cached['redirects']))
titles = sorted(titles, key=lambda x: (x[2], x[3]), reverse=True)
return render_template('index.html', titles=titles)

View File

@ -111,3 +111,13 @@ hr {
border-style: inset;
border-width: 1px;
}
table {
table-layout: fixed;
}
table td p {
overflow: hidden;
text-overflow: ellipsis;
margin: 0;
}

View File

@ -1,16 +1,56 @@
{% extends "main.html" %}
{% block title %}Tree{% endblock %}
{% block title %}Lookyloo{% endblock %}
{% block content %}
<center>
<h2><a href="{{ url_for('scrape_web') }}">Scrape a page</a></h2>
</br></br>
<a href="{{ url_for('scrape_web') }}">
<img src="{{ url_for('static', filename='lookyloo.jpeg') }}"
alt="Lookyloo" width="200">
</a>
</center>
<center>
{% for uuid, page_title in titles %}
<a href="{{ url_for('tree', tree_uuid=uuid) }}">{{ page_title }}</a>
</br></br>
{% endfor %}
<h2><a href="{{ url_for('scrape_web') }}">Start a new capture</a></h2>
</br></br>
</center>
<center>
<div class="table-responsive">
<table id="table" class="table" data-toggle="table" data-search="true">
<thead>
<tr>
<th data-width="200">Page</th>
<th data-width="80">Timestamp</th>
<th data-width="200">Redirects</th>
</tr>
</thead>
<tbody>
{% for uuid, page_title, date, time, url, redirects in titles %}
<tr>
<td>
<p title="{{ page_title }}"><a href="{{ url_for('tree', tree_uuid=uuid) }}">{{ page_title }}</a></p>
<div id="url">{{ url }}</div>
</td>
<td>{{ date }} {{ time }}</td>
<td>
{% if redirects %}
{% for r in redirects %}
<p title="{{ r }}">
{% if loop.previtem %}
{{ ("&nbsp;" * (loop.index *2) )|safe }}↪ {{ r }}
{%else%}
{{ r }}
{%endif%}
</p>
{% endfor %}
{% else%}
No redirect
{%endif%}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</center>
{% endblock %}

View File

@ -10,21 +10,27 @@
{% block styles %}
<!-- Bootstrap CSS -->
{{ bootstrap.load_css() }}
<link rel="stylesheet" href="{{ url_for('static', filename='tree.css') }}">
<link rel="stylesheet" href="{{ url_for('static', filename='bootstrap-table.min.css') }}">
<link rel="stylesheet" href="{{ url_for('static', filename='tree.css') }}">
{% endblock %}
<title>Lookyloo</title>
<title>
{% block title %}{% endblock%}
</title>
{% endblock %}
</head>
<body>
<!-- Your page contont -->
{% block content %}{% endblock%}
<!-- Your page content -->
<div class="container">
{% block content %}{% endblock%}
</div>
{% block scripts %}
<!-- Optional JavaScript -->
{{ bootstrap.load_js() }}
<script src='{{ url_for('static', filename='FileSaver.js') }}'></script>
<script src='{{ url_for('static', filename='d3.v5.min.js') }}'></script>
<script src='{{ url_for('static', filename='FileSaver.js') }}'></script>
<script src='{{ url_for('static', filename='bootstrap-table.min.js') }}'></script>
{% endblock %}
</body>
</html>

View File

@ -5,7 +5,7 @@
<div class="container">
<center>
<img src="{{ url_for('static', filename='lookyloo.jpeg') }}"
alt="Lookyloo" width="500">
alt="Lookyloo" width="400">
</center>
</br>
<form role="form" action="scrape" method=post enctype=multipart/form-data>