mirror of https://github.com/CIRCL/lookyloo
new: Add initial redirects columns
parent
89edef68ab
commit
4234e44470
|
@ -30,7 +30,7 @@ import logging
|
||||||
|
|
||||||
from pysanejs import SaneJS
|
from pysanejs import SaneJS
|
||||||
from scrapysplashwrapper import crawl
|
from scrapysplashwrapper import crawl
|
||||||
from har2tree import CrawledTree, Har2TreeError
|
from har2tree import CrawledTree, Har2TreeError, HarFile
|
||||||
|
|
||||||
|
|
||||||
class Lookyloo():
|
class Lookyloo():
|
||||||
|
@ -72,17 +72,13 @@ class Lookyloo():
|
||||||
return
|
return
|
||||||
with (report_dir / 'uuid').open() as f:
|
with (report_dir / 'uuid').open() as f:
|
||||||
uuid = f.read().strip()
|
uuid = f.read().strip()
|
||||||
with har_files[0].open() as f:
|
har = HarFile(har_files[0])
|
||||||
j = json.load(f)
|
|
||||||
title = j['log']['pages'][0]['title']
|
cache: Dict[str, Union[str, int]] = {'uuid': uuid,
|
||||||
timestamp = j['log']['pages'][0]['startedDateTime']
|
'title': har.initial_title,
|
||||||
if j['log']['entries']:
|
'timestamp': har.initial_start_time,
|
||||||
first_url = j['log']['entries'][0]['request']['url']
|
'url': har.first_url,
|
||||||
else:
|
'redirects': json.dumps(har.initial_redirects)}
|
||||||
first_url = '-'
|
|
||||||
if not title:
|
|
||||||
title = '!! No title found !! '
|
|
||||||
cache = {'uuid': uuid, 'title': title, 'timestamp': timestamp, 'url': first_url}
|
|
||||||
if (report_dir / 'no_index').exists(): # If the folders claims anonymity
|
if (report_dir / 'no_index').exists(): # If the folders claims anonymity
|
||||||
cache['no_index'] = 1
|
cache['no_index'] = 1
|
||||||
if uuid and not self.redis.exists(str(report_dir)):
|
if uuid and not self.redis.exists(str(report_dir)):
|
||||||
|
@ -92,7 +88,9 @@ class Lookyloo():
|
||||||
def report_cache(self, report_dir: Union[str, Path]) -> Dict:
|
def report_cache(self, report_dir: Union[str, Path]) -> Dict:
|
||||||
if isinstance(report_dir, Path):
|
if isinstance(report_dir, Path):
|
||||||
report_dir = str(report_dir)
|
report_dir = str(report_dir)
|
||||||
return self.redis.hgetall(report_dir)
|
cached = self.redis.hgetall(report_dir)
|
||||||
|
cached['redirects'] = json.loads(cached['redirects'])
|
||||||
|
return cached
|
||||||
|
|
||||||
def _init_existing_dumps(self) -> None:
|
def _init_existing_dumps(self) -> None:
|
||||||
for report_dir in self.report_dirs:
|
for report_dir in self.report_dirs:
|
||||||
|
|
|
@ -208,7 +208,7 @@ lxml = "^4.4.2"
|
||||||
six = "^1.14.0"
|
six = "^1.14.0"
|
||||||
|
|
||||||
[package.source]
|
[package.source]
|
||||||
reference = "12c88d6298e52a8458e220b6910597cb5539daa9"
|
reference = "147ac0f014249358af7b54cf02d5c85644a60645"
|
||||||
type = "git"
|
type = "git"
|
||||||
url = "https://github.com/viper-framework/har2tree.git"
|
url = "https://github.com/viper-framework/har2tree.git"
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
|
@ -175,6 +175,6 @@ def index():
|
||||||
continue
|
continue
|
||||||
date, time = cached['timestamp'].split('T')
|
date, time = cached['timestamp'].split('T')
|
||||||
time, _ = time.split('.', 1)
|
time, _ = time.split('.', 1)
|
||||||
titles.append((cached['uuid'], cached['title'], date, time, cached['url']))
|
titles.append((cached['uuid'], cached['title'], date, time, cached['url'], cached['redirects']))
|
||||||
titles = sorted(titles, key=lambda x: (x[2], x[3]), reverse=True)
|
titles = sorted(titles, key=lambda x: (x[2], x[3]), reverse=True)
|
||||||
return render_template('index.html', titles=titles)
|
return render_template('index.html', titles=titles)
|
||||||
|
|
|
@ -8,23 +8,35 @@
|
||||||
</br></br>
|
</br></br>
|
||||||
</center>
|
</center>
|
||||||
|
|
||||||
<table class="table table-hover" data-toggle="table" data-search="true">
|
<table class="table" data-toggle="table" data-search="true" data-show-columns="true">
|
||||||
<thead>
|
<thead>
|
||||||
<tr>
|
<tr>
|
||||||
<th scope="col">Page title</th>
|
<th>Page title</th>
|
||||||
<th scope="col">Initial URL</th>
|
<th>Initial URL</th>
|
||||||
<tr>
|
<th>Initial redirects</th>
|
||||||
|
<tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
{% for uuid, page_title, date, time, url in titles %}
|
{% for uuid, page_title, date, time, url, redirects in titles %}
|
||||||
<tr>
|
<tr>
|
||||||
<td>
|
<td>
|
||||||
<a href="{{ url_for('tree', tree_uuid=uuid) }}">{{ page_title }}</a>
|
<a href="{{ url_for('tree', tree_uuid=uuid) }}">{{ page_title }}</a>
|
||||||
<div id="timestamp"> <b>{{ date }}</b> {{ time }}</div>
|
<div id="timestamp"> <b>{{ date }}</b> {{ time }}</div>
|
||||||
</td>
|
</td>
|
||||||
<td>{{ url }}</td>
|
<td>{{ url }}</td>
|
||||||
</tr>
|
<td>
|
||||||
{% endfor %}
|
{% if redirects %}
|
||||||
|
<ul>
|
||||||
|
{% for r in redirects %}
|
||||||
|
<li>{{ r }}</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
{% else%}
|
||||||
|
No redirect
|
||||||
|
{%endif%}
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|
|
@ -21,9 +21,13 @@
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<!-- Your page contont -->
|
<!-- Your page contont -->
|
||||||
|
<!--
|
||||||
<div class="container">
|
<div class="container">
|
||||||
|
-->
|
||||||
{% block content %}{% endblock%}
|
{% block content %}{% endblock%}
|
||||||
|
<!--
|
||||||
</div>
|
</div>
|
||||||
|
-->
|
||||||
|
|
||||||
{% block scripts %}
|
{% block scripts %}
|
||||||
<!-- Optional JavaScript -->
|
<!-- Optional JavaScript -->
|
||||||
|
|
Loading…
Reference in New Issue