diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index e634376..7f54fb3 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -30,7 +30,7 @@ import logging from pysanejs import SaneJS from scrapysplashwrapper import crawl -from har2tree import CrawledTree, Har2TreeError +from har2tree import CrawledTree, Har2TreeError, HarFile class Lookyloo(): @@ -72,12 +72,13 @@ class Lookyloo(): return with (report_dir / 'uuid').open() as f: uuid = f.read().strip() - with har_files[0].open() as f: - j = json.load(f) - title = j['log']['pages'][0]['title'] - if not title: - title = '!! No title found !! ' - cache = {'uuid': uuid, 'title': title} + har = HarFile(har_files[0]) + + cache: Dict[str, Union[str, int]] = {'uuid': uuid, + 'title': har.initial_title, + 'timestamp': har.initial_start_time, + 'url': har.first_url, + 'redirects': json.dumps(har.initial_redirects)} if (report_dir / 'no_index').exists(): # If the folders claims anonymity cache['no_index'] = 1 if uuid and not self.redis.exists(str(report_dir)): @@ -87,7 +88,9 @@ class Lookyloo(): def report_cache(self, report_dir: Union[str, Path]) -> Dict: if isinstance(report_dir, Path): report_dir = str(report_dir) - return self.redis.hgetall(report_dir) + cached = self.redis.hgetall(report_dir) + cached['redirects'] = json.loads(cached['redirects']) + return cached def _init_existing_dumps(self) -> None: for report_dir in self.report_dirs: diff --git a/poetry.lock b/poetry.lock index 8d9e983..0665d00 100644 --- a/poetry.lock +++ b/poetry.lock @@ -80,7 +80,7 @@ description = "Foreign Function Interface for Python calling C code." name = "cffi" optional = false python-versions = "*" -version = "1.13.2" +version = "1.14.0" [package.dependencies] pycparser = "*" @@ -199,7 +199,7 @@ description = "" name = "har2tree" optional = false python-versions = "^3.6" -version = "0.1.0" +version = "1.0" [package.dependencies] beautifulsoup4 = "^4.8.2" @@ -208,7 +208,7 @@ lxml = "^4.4.2" six = "^1.14.0" [package.source] -reference = "12c88d6298e52a8458e220b6910597cb5539daa9" +reference = "ff0f6294728902e7e36c294e5b51ac51ba894a03" type = "git" url = "https://github.com/viper-framework/har2tree.git" [[package]] @@ -431,13 +431,13 @@ description = "" name = "pysanejs" optional = false python-versions = "^3.6" -version = "0.1.0" +version = "1.0" [package.dependencies] requests = "^2.22.0" [package.source] -reference = "bdc091fbae7019c39b47a149b12f8ac032eda2a3" +reference = "3ea143f44d37ab701c70ffb38408528ddb4e2b6e" type = "git" url = "https://github.com/CIRCL/PySaneJS.git" [[package]] @@ -529,14 +529,14 @@ description = "" name = "scrapysplashwrapper" optional = false python-versions = "^3.6" -version = "0.1.0" +version = "1.0" [package.dependencies] scrapy = "^1.8.0" scrapy-splash = "^0.7.2" [package.source] -reference = "2bbc9c1dd405993bc775e62447fac3355dfbde74" +reference = "d781ff5867504f50ce9411fc7cad1a653dd2a02d" type = "git" url = "https://github.com/viper-framework/ScrapySplashWrapper.git" [[package]] @@ -650,12 +650,11 @@ category = "main" description = "The comprehensive WSGI web application library." name = "werkzeug" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -version = "0.16.1" +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +version = "1.0.0" [package.extras] dev = ["pytest", "coverage", "tox", "sphinx", "pallets-sphinx-themes", "sphinx-issues"] -termcolor = ["termcolor"] watchdog = ["watchdog"] [[package]] @@ -726,39 +725,34 @@ certifi = [ {file = "certifi-2019.11.28.tar.gz", hash = "sha256:25b64c7da4cd7479594d035c08c2d809eb4aab3a26e5a990ea98cc450c320f1f"}, ] cffi = [ - {file = "cffi-1.13.2-cp27-cp27m-macosx_10_6_intel.whl", hash = "sha256:3c9fff570f13480b201e9ab69453108f6d98244a7f495e91b6c654a47486ba43"}, - {file = "cffi-1.13.2-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:2c5e309ec482556397cb21ede0350c5e82f0eb2621de04b2633588d118da4396"}, - {file = "cffi-1.13.2-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:19db0cdd6e516f13329cba4903368bff9bb5a9331d3410b1b448daaadc495e54"}, - {file = "cffi-1.13.2-cp27-cp27m-win32.whl", hash = "sha256:5c4fae4e9cdd18c82ba3a134be256e98dc0596af1e7285a3d2602c97dcfa5159"}, - {file = "cffi-1.13.2-cp27-cp27m-win_amd64.whl", hash = "sha256:32a262e2b90ffcfdd97c7a5e24a6012a43c61f1f5a57789ad80af1d26c6acd97"}, - {file = "cffi-1.13.2-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:4a43c91840bda5f55249413037b7a9b79c90b1184ed504883b72c4df70778579"}, - {file = "cffi-1.13.2-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:8169cf44dd8f9071b2b9248c35fc35e8677451c52f795daa2bb4643f32a540bc"}, - {file = "cffi-1.13.2-cp34-cp34m-macosx_10_6_intel.whl", hash = "sha256:71a608532ab3bd26223c8d841dde43f3516aa5d2bf37b50ac410bb5e99053e8f"}, - {file = "cffi-1.13.2-cp34-cp34m-manylinux1_i686.whl", hash = "sha256:7f627141a26b551bdebbc4855c1157feeef18241b4b8366ed22a5c7d672ef858"}, - {file = "cffi-1.13.2-cp34-cp34m-manylinux1_x86_64.whl", hash = "sha256:0b49274afc941c626b605fb59b59c3485c17dc776dc3cc7cc14aca74cc19cc42"}, - {file = "cffi-1.13.2-cp34-cp34m-win32.whl", hash = "sha256:4424e42199e86b21fc4db83bd76909a6fc2a2aefb352cb5414833c030f6ed71b"}, - {file = "cffi-1.13.2-cp34-cp34m-win_amd64.whl", hash = "sha256:7d4751da932caaec419d514eaa4215eaf14b612cff66398dd51129ac22680b20"}, - {file = "cffi-1.13.2-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:ccb032fda0873254380aa2bfad2582aedc2959186cce61e3a17abc1a55ff89c3"}, - {file = "cffi-1.13.2-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:dcd65317dd15bc0451f3e01c80da2216a31916bdcffd6221ca1202d96584aa25"}, - {file = "cffi-1.13.2-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:135f69aecbf4517d5b3d6429207b2dff49c876be724ac0c8bf8e1ea99df3d7e5"}, - {file = "cffi-1.13.2-cp35-cp35m-win32.whl", hash = "sha256:7b93a885bb13073afb0aa73ad82059a4c41f4b7d8eb8368980448b52d4c7dc2c"}, - {file = "cffi-1.13.2-cp35-cp35m-win_amd64.whl", hash = "sha256:e570d3ab32e2c2861c4ebe6ffcad6a8abf9347432a37608fe1fbd157b3f0036b"}, - {file = "cffi-1.13.2-cp36-cp36m-macosx_10_6_intel.whl", hash = "sha256:0e3ea92942cb1168e38c05c1d56b0527ce31f1a370f6117f1d490b8dcd6b3a04"}, - {file = "cffi-1.13.2-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:5ecfa867dea6fabe2a58f03ac9186ea64da1386af2159196da51c4904e11d652"}, - {file = "cffi-1.13.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:291f7c42e21d72144bb1c1b2e825ec60f46d0a7468f5346841860454c7aa8f57"}, - {file = "cffi-1.13.2-cp36-cp36m-win32.whl", hash = "sha256:62f2578358d3a92e4ab2d830cd1c2049c9c0d0e6d3c58322993cc341bdeac22e"}, - {file = "cffi-1.13.2-cp36-cp36m-win_amd64.whl", hash = "sha256:fd43a88e045cf992ed09fa724b5315b790525f2676883a6ea64e3263bae6549d"}, - {file = "cffi-1.13.2-cp37-cp37m-macosx_10_6_intel.whl", hash = "sha256:d75c461e20e29afc0aee7172a0950157c704ff0dd51613506bd7d82b718e7410"}, - {file = "cffi-1.13.2-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:aa00d66c0fab27373ae44ae26a66a9e43ff2a678bf63a9c7c1a9a4d61172827a"}, - {file = "cffi-1.13.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:2e9c80a8c3344a92cb04661115898a9129c074f7ab82011ef4b612f645939f12"}, - {file = "cffi-1.13.2-cp37-cp37m-win32.whl", hash = "sha256:d754f39e0d1603b5b24a7f8484b22d2904fa551fe865fd0d4c3332f078d20d4e"}, - {file = "cffi-1.13.2-cp37-cp37m-win_amd64.whl", hash = "sha256:6471a82d5abea994e38d2c2abc77164b4f7fbaaf80261cb98394d5793f11b12a"}, - {file = "cffi-1.13.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:74a1d8c85fb6ff0b30fbfa8ad0ac23cd601a138f7509dc617ebc65ef305bb98d"}, - {file = "cffi-1.13.2-cp38-cp38-manylinux1_i686.whl", hash = "sha256:42194f54c11abc8583417a7cf4eaff544ce0de8187abaf5d29029c91b1725ad3"}, - {file = "cffi-1.13.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:415bdc7ca8c1c634a6d7163d43fb0ea885a07e9618a64bda407e04b04333b7db"}, - {file = "cffi-1.13.2-cp38-cp38-win32.whl", hash = "sha256:6d4f18483d040e18546108eb13b1dfa1000a089bcf8529e30346116ea6240506"}, - {file = "cffi-1.13.2-cp38-cp38-win_amd64.whl", hash = "sha256:2781e9ad0e9d47173c0093321bb5435a9dfae0ed6a762aabafa13108f5f7b2ba"}, - {file = "cffi-1.13.2.tar.gz", hash = "sha256:599a1e8ff057ac530c9ad1778293c665cb81a791421f46922d80a86473c13346"}, + {file = "cffi-1.14.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:1cae98a7054b5c9391eb3249b86e0e99ab1e02bb0cc0575da191aedadbdf4384"}, + {file = "cffi-1.14.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:cf16e3cf6c0a5fdd9bc10c21687e19d29ad1fe863372b5543deaec1039581a30"}, + {file = "cffi-1.14.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:f2b0fa0c01d8a0c7483afd9f31d7ecf2d71760ca24499c8697aeb5ca37dc090c"}, + {file = "cffi-1.14.0-cp27-cp27m-win32.whl", hash = "sha256:99f748a7e71ff382613b4e1acc0ac83bf7ad167fb3802e35e90d9763daba4d78"}, + {file = "cffi-1.14.0-cp27-cp27m-win_amd64.whl", hash = "sha256:c420917b188a5582a56d8b93bdd8e0f6eca08c84ff623a4c16e809152cd35793"}, + {file = "cffi-1.14.0-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:399aed636c7d3749bbed55bc907c3288cb43c65c4389964ad5ff849b6370603e"}, + {file = "cffi-1.14.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:cab50b8c2250b46fe738c77dbd25ce017d5e6fb35d3407606e7a4180656a5a6a"}, + {file = "cffi-1.14.0-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:001bf3242a1bb04d985d63e138230802c6c8d4db3668fb545fb5005ddf5bb5ff"}, + {file = "cffi-1.14.0-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:e56c744aa6ff427a607763346e4170629caf7e48ead6921745986db3692f987f"}, + {file = "cffi-1.14.0-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:b8c78301cefcf5fd914aad35d3c04c2b21ce8629b5e4f4e45ae6812e461910fa"}, + {file = "cffi-1.14.0-cp35-cp35m-win32.whl", hash = "sha256:8c0ffc886aea5df6a1762d0019e9cb05f825d0eec1f520c51be9d198701daee5"}, + {file = "cffi-1.14.0-cp35-cp35m-win_amd64.whl", hash = "sha256:8a6c688fefb4e1cd56feb6c511984a6c4f7ec7d2a1ff31a10254f3c817054ae4"}, + {file = "cffi-1.14.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:95cd16d3dee553f882540c1ffe331d085c9e629499ceadfbda4d4fde635f4b7d"}, + {file = "cffi-1.14.0-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:66e41db66b47d0d8672d8ed2708ba91b2f2524ece3dee48b5dfb36be8c2f21dc"}, + {file = "cffi-1.14.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:028a579fc9aed3af38f4892bdcc7390508adabc30c6af4a6e4f611b0c680e6ac"}, + {file = "cffi-1.14.0-cp36-cp36m-win32.whl", hash = "sha256:cef128cb4d5e0b3493f058f10ce32365972c554572ff821e175dbc6f8ff6924f"}, + {file = "cffi-1.14.0-cp36-cp36m-win_amd64.whl", hash = "sha256:337d448e5a725bba2d8293c48d9353fc68d0e9e4088d62a9571def317797522b"}, + {file = "cffi-1.14.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e577934fc5f8779c554639376beeaa5657d54349096ef24abe8c74c5d9c117c3"}, + {file = "cffi-1.14.0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:62ae9af2d069ea2698bf536dcfe1e4eed9090211dbaafeeedf5cb6c41b352f66"}, + {file = "cffi-1.14.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:14491a910663bf9f13ddf2bc8f60562d6bc5315c1f09c704937ef17293fb85b0"}, + {file = "cffi-1.14.0-cp37-cp37m-win32.whl", hash = "sha256:c43866529f2f06fe0edc6246eb4faa34f03fe88b64a0a9a942561c8e22f4b71f"}, + {file = "cffi-1.14.0-cp37-cp37m-win_amd64.whl", hash = "sha256:2089ed025da3919d2e75a4d963d008330c96751127dd6f73c8dc0c65041b4c26"}, + {file = "cffi-1.14.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3b911c2dbd4f423b4c4fcca138cadde747abdb20d196c4a48708b8a2d32b16dd"}, + {file = "cffi-1.14.0-cp38-cp38-manylinux1_i686.whl", hash = "sha256:7e63cbcf2429a8dbfe48dcc2322d5f2220b77b2e17b7ba023d6166d84655da55"}, + {file = "cffi-1.14.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:3d311bcc4a41408cf5854f06ef2c5cab88f9fded37a3b95936c9879c1640d4c2"}, + {file = "cffi-1.14.0-cp38-cp38-win32.whl", hash = "sha256:675686925a9fb403edba0114db74e741d8181683dcf216be697d208857e04ca8"}, + {file = "cffi-1.14.0-cp38-cp38-win_amd64.whl", hash = "sha256:00789914be39dffba161cfc5be31b55775de5ba2235fe49aa28c148236c4e06b"}, + {file = "cffi-1.14.0.tar.gz", hash = "sha256:2d384f4a127a15ba701207f7639d94106693b6cd64173d6c8988e2c25f3ac2b6"}, ] chardet = [ {file = "chardet-3.0.4-py2.py3-none-any.whl", hash = "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"}, @@ -1066,8 +1060,8 @@ w3lib = [ {file = "w3lib-1.21.0.tar.gz", hash = "sha256:8b1854fef570b5a5fc84d960e025debd110485d73fd283580376104762774315"}, ] werkzeug = [ - {file = "Werkzeug-0.16.1-py2.py3-none-any.whl", hash = "sha256:1e0dedc2acb1f46827daa2e399c1485c8fa17c0d8e70b6b875b4e7f54bf408d2"}, - {file = "Werkzeug-0.16.1.tar.gz", hash = "sha256:b353856d37dec59d6511359f97f6a4b2468442e454bd1c98298ddce53cac1f04"}, + {file = "Werkzeug-1.0.0-py2.py3-none-any.whl", hash = "sha256:6dc65cf9091cf750012f56f2cad759fa9e879f511b5ff8685e456b4e3bf90d16"}, + {file = "Werkzeug-1.0.0.tar.gz", hash = "sha256:169ba8a33788476292d04186ab33b01d6add475033dfc07215e6d219cc077096"}, ] "zope.interface" = [ {file = "zope.interface-4.7.1-cp27-cp27m-macosx_10_6_intel.whl", hash = "sha256:14157421f4121a57625002cc4f48ac7521ea238d697c4a4459a884b62132b977"}, diff --git a/pyproject.toml b/pyproject.toml index c09f1e3..633785b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "lookyloo" -version = "0.1.0" +version = "1.0" description = "Web interface to track the trackers." authors = ["Raphaël Vinot "] license = "BSD-3-Clause" diff --git a/website/3rdparty.sh b/website/3rdparty.sh index ec53fde..df422a6 100755 --- a/website/3rdparty.sh +++ b/website/3rdparty.sh @@ -10,3 +10,8 @@ wget -q https://d3js.org/d3.v5.min.js -O web/static/d3.v5.min.js FileSaver="v2.0.2" wget -q https://raw.githubusercontent.com/eligrey/FileSaver.js/${FileSaver}/src/FileSaver.js -O web/static/FileSaver.js + +bootstrap_table="1.15.5" + +wget -q https://unpkg.com/bootstrap-table@${bootstrap_table}/dist/bootstrap-table.min.css -O web/static/bootstrap-table.min.css +wget -q https://unpkg.com/bootstrap-table@${bootstrap_table}/dist/bootstrap-table.min.js -O web/static/bootstrap-table.min.js diff --git a/website/web/__init__.py b/website/web/__init__.py index 11e90d5..eea34a7 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -173,5 +173,8 @@ def index(): cached = lookyloo.report_cache(report_dir) if not cached or 'no_index' in cached: continue - titles.append((cached['uuid'], cached['title'])) + date, time = cached['timestamp'].split('T') + time, _ = time.split('.', 1) + titles.append((cached['uuid'], cached['title'], date, time, cached['url'], cached['redirects'])) + titles = sorted(titles, key=lambda x: (x[2], x[3]), reverse=True) return render_template('index.html', titles=titles) diff --git a/website/web/static/tree.css b/website/web/static/tree.css index 74e7239..286a4fc 100644 --- a/website/web/static/tree.css +++ b/website/web/static/tree.css @@ -111,3 +111,13 @@ hr { border-style: inset; border-width: 1px; } + +table { + table-layout: fixed; +} + +table td p { + overflow: hidden; + text-overflow: ellipsis; + margin: 0; +} diff --git a/website/web/templates/index.html b/website/web/templates/index.html index ffc5517..38f6260 100644 --- a/website/web/templates/index.html +++ b/website/web/templates/index.html @@ -1,16 +1,56 @@ {% extends "main.html" %} -{% block title %}Tree{% endblock %} +{% block title %}Lookyloo{% endblock %} {% block content %}
-

Scrape a page

-

+ + Lookyloo +
- {% for uuid, page_title in titles %} - {{ page_title }} -

- {% endfor %} +

Start a new capture

+

+
+ +
+
+ + + + + + + + + + {% for uuid, page_title, date, time, url, redirects in titles %} + + + + + + {% endfor %} + +
PageTimestampRedirects
+

{{ page_title }}

+
{{ url }}
+
{{ date }} {{ time }} + {% if redirects %} + {% for r in redirects %} +

+ {% if loop.previtem %} + {{ (" " * (loop.index *2) )|safe }}↪ {{ r }} + {%else%} + {{ r }} + {%endif%} +

+ {% endfor %} + {% else%} + No redirect + {%endif%} +
+
{% endblock %} diff --git a/website/web/templates/main.html b/website/web/templates/main.html index 80f7849..f93a3b3 100644 --- a/website/web/templates/main.html +++ b/website/web/templates/main.html @@ -10,21 +10,27 @@ {% block styles %} {{ bootstrap.load_css() }} - + + {% endblock %} - Lookyloo + + {% block title %}{% endblock%} + {% endblock %} - - {% block content %}{% endblock%} + +
+ {% block content %}{% endblock%} +
{% block scripts %} {{ bootstrap.load_js() }} - + + {% endblock %} diff --git a/website/web/templates/scrape.html b/website/web/templates/scrape.html index 6747fd2..6d5c640 100644 --- a/website/web/templates/scrape.html +++ b/website/web/templates/scrape.html @@ -5,7 +5,7 @@
Lookyloo + alt="Lookyloo" width="400">