chg: cleanup and improve views

pull/86/head
Raphaël Vinot 2020-09-04 18:40:51 +02:00
parent 9f4c77d5d2
commit 457829a23f
6 changed files with 116 additions and 15 deletions

View File

@ -0,0 +1,42 @@
{
"f766df685b673657bdf57551354c149be2024385102854d2ca351e976684bb88361eae848f11f714e6e5973c061440831ea6f5be995b89fd5bd2d4559a0dc4a6": {
"domain": [],
"description": "jQuery v1.12.4 - WordPress 2019-05-16"
},
"9c9616ccbc9765f4e825f6b57fba35e57b97b5ef5f51e88a5fe6d44bf22edbee1a52975f3311fe25d2ca65837b34dcb51cc2e00f02410c54a3aeee6a2c17e255": {
"domain": [],
"description": "Google SafeFrame Container"
},
"cf69087b8f92f7b81efa788c3eb0b8a551405cdc7fa137e09a918349617359715ad5ef833f901e8d6e80c9ff20f63091710b492224e2ad23848673995dff5610": {
"domain": [],
"description": "Wordpress - embed - auto generated"
},
"21047fea5269fee75a2a187aa09316519e35068cb2f2f76cfaf371e5224445e9d5c98497bd76fb9608d2b73e9dac1a3f5bfadfdc4623c479d53ecf93d81d3c9f": {
"domain": [],
"description": "Nginx - 301 - HTML"
},
"0344c6b2757d4d787ed4a31ec7043c9dc9bf57017e451f60cecb9ad8f5febf64acf2a6c996346ae4b23297623ebf747954410aee27ee3c2f3c6ccd15a15d0f2d": {
"domain": [],
"description": "Nginx - 301 - HTML"
},
"e423354c2083d0c889a488186322c5bf045f0e5dfa04db55d1625d21a0b4862a1d357aed0463b5e9d2659f7a8427c2c78da4084c1c741a5db7ab4742f8b55304": {
"domain": [],
"description": "jQuery UI CSS Framework 1.8.20"
},
"b828576537cff413f37461f6a10bf6fc97cfcd256afb2f65d07ae552bbc8a639de1d84ed55fcade3682996da960d3f44e086ac56aa5f596b8607d9d118bb47ef": {
"domain": [],
"description": "Transparent PNG"
},
"22142edb5016c6d74fef35af858439a3d314021ea7822bd65a00bcf35bed39576e490fb74dc2c04d32250178eb228db9a2ceeee290cf63aacb4f03741ad45949": {
"domain": [],
"description": "1px PNG"
},
"43de6d36c775ce0f23813bc8ca401633762d0d7abd1945d4f8490f81ff7623d49ef423f4d63362c4ea57d58038d8edf3ad2d06869f4c4fc9d88c0e64c4a19470": {
"domain": [],
"description": "Gravatar unknown image"
},
"c99bf4f1351efb28a74fa2504429875d9a63eb2d6a145a060ed487f83ff3a42b6c85d94165b960edca90aceec58d16a6ed37b25f44452bbacd7f5204c15c23cc": {
"domain": [],
"description": "Nginx - 302 - HTML"
}
}

View File

@ -115,8 +115,8 @@ class Indexing():
return self.redis.zcard(f'bh|{h}')
def body_hash_fequency(self, body_hash: str) -> Dict[str, float]:
return {'hash_freq': self.redis.zscore('body_hashes', body_hash),
'hash_domains_freq': self.redis.zcard(f'bh|{body_hash}')}
return {'hash_freq': int(self.redis.zscore('body_hashes', body_hash)),
'hash_domains_freq': int(self.redis.zcard(f'bh|{body_hash}'))}
def index_body_hashes_capture(self, crawled_tree: CrawledTree) -> None:
if self.redis.sismember('indexed_body_hashes', crawled_tree.uuid):
@ -143,17 +143,23 @@ class Indexing():
return capture_uuid, urlnode_uuid, hostnode_uuid
def get_body_hash_captures(self, body_hash: str, filter_url: Optional[str]=None,
filter_capture_uuid: Optional[str]=None,
limit: int=20) -> Tuple[int, List[Tuple[str, str, str, bool]]]:
to_return: List[Tuple[str, str, str, bool]] = []
all_captures = self.redis.smembers(f'bh|{body_hash}|captures')
len_captures = len(all_captures)
for capture_uuid in list(all_captures)[:limit]:
if capture_uuid == filter_capture_uuid:
# Used to skip hits in current capture
len_captures -= 1
continue
for entry in self.redis.zrevrange(f'bh|{body_hash}|captures|{capture_uuid}', 0, -1):
url_uuid, hostnode_uuid, url = entry.split('|', 2)
if filter_url:
to_return.append((capture_uuid, hostnode_uuid, urlsplit(url).hostname, url == filter_url))
else:
to_return.append((capture_uuid, hostnode_uuid, urlsplit(url).hostname, False))
return len(all_captures), to_return
return len_captures, to_return
def get_body_hash_domains(self, body_hash: str) -> List[Tuple[str, float]]:
return self.redis.zrevrange(f'bh|{body_hash}', 0, -1, withscores=True)
@ -1070,11 +1076,8 @@ class Lookyloo():
def hash_lookup(self, blob_hash: str, url: str, capture_uuid: str) -> Tuple[int, Dict[str, List[Tuple[str, str, str, str, str]]]]:
captures_list: Dict[str, List[Tuple[str, str, str, str, str]]] = {'same_url': [], 'different_url': []}
total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url)
total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid)
for h_capture_uuid, url_uuid, url_hostname, same_url in details:
if h_capture_uuid == capture_uuid:
# Skip self.
continue
cache = self.capture_cache(h_capture_uuid)
if cache:
if same_url:

6
poetry.lock generated
View File

@ -948,7 +948,7 @@ description = "Traitlets Python configuration system"
name = "traitlets"
optional = false
python-versions = ">=3.7"
version = "5.0.0"
version = "5.0.3"
[package.dependencies]
ipython-genutils = "*"
@ -1639,8 +1639,8 @@ toml = [
{file = "toml-0.10.1.tar.gz", hash = "sha256:926b612be1e5ce0634a2ca03470f95169cf16f939018233a670519cb4ac58b0f"},
]
traitlets = [
{file = "traitlets-5.0.0-py3-none-any.whl", hash = "sha256:62a037f12ccb823fb05823afbe35fe0273bc18fa3202d0cf0ea8f24e97e464be"},
{file = "traitlets-5.0.0.tar.gz", hash = "sha256:0d9c4005506b306b0a99551e96174b8bedc675c2dd048f92b3bbbb7d86ac93a9"},
{file = "traitlets-5.0.3-py3-none-any.whl", hash = "sha256:8bdadb17a04c844f444cdefaa3dee47a12ff14cf6277b9eeda29bfa0659d5987"},
{file = "traitlets-5.0.3.tar.gz", hash = "sha256:a2e91709a0330b6c5d497ed470b2feb1ed8da5c9dd807c6daab41f727b9391c9"},
]
twisted = [
{file = "Twisted-20.3.0-cp27-cp27m-macosx_10_6_intel.whl", hash = "sha256:cdbc4c7f0cd7a2218b575844e970f05a1be1861c607b0e048c9bceca0c4d42f7"},

View File

@ -43,7 +43,6 @@
</script>
<script>
let whereAmI = (hostnode_uuid) => window.opener.LocateNode(hostnode_uuid);
let flag = (hostnode_uuid) => window.opener.NodeHighlight(hostnode_uuid);
let openTreeInNewTab = (capture_uuid, hostnode_uuid=Null) => window.opener.openTreeInNewTab(capture_uuid, hostnode_uuid);
</script>
<script>
@ -63,8 +62,8 @@
{% block content %}
{# Headers #}
<center>
<h3>{{ hostname }}</h3>
<button type="button" class="btn btn-info" onclick="whereAmI('{{ hostode_uuid }}')">Locate in tree</button>
<h3>{{ hostname }}</h3>
<button type="button" class="btn btn-info" onclick="whereAmI('{{ hostnode_uuid }}')">Locate in tree</button>
<a href="{{ url_for('hostnode_details_text', tree_uuid=tree_uuid, node_uuid=hostnode_uuid) }}" class="btn btn-info" role="button">Download URLs as text</a>
</center>
{# Start list of URLs #}

View File

@ -104,6 +104,9 @@ Body size: {{ sizeof_fmt(ressource_size) }}
{# Lists of other captures loading the same content... #}
<div class="collapse" id="captureslist_{{ identifier_for_toggle }}">
<div class="card card-body">
{% if total_captures > 20 %}
Note that only 20 captures are displayed here.
{% endif %}
{% if other_captures['different_url']|length > 0 %}
{# ... on other URLs #}
<div>
@ -121,8 +124,6 @@ Body size: {{ sizeof_fmt(ressource_size) }}
{% endif %}
</div>
</div>
{% else %}
<p>This file is loaded multiple times in this capture.</p>
{% endif %}
{% endmacro %}

View File

@ -0,0 +1,56 @@
{% extends "main.html" %}
{% from 'bootstrap/utils.html' import render_messages %}
{% from 'macros.html' import shorten_string %}
{% from 'macros.html' import get_ressource_button %}
{% from 'macros.html' import context_form %}
{% block title %}Ressources{% endblock %}
{% block scripts %}
{{ super() }}
<script src='{{ url_for('static', filename='datatables.min.js') }}'></script>
<script type="text/javascript">
$('#table').DataTable( {
"order": [[ 2, "desc" ]],
"pageLength": 500
});
</script>
{% endblock %}
{% block styles %}
{{ super() }}
<link rel="stylesheet" href="{{ url_for('static', filename='datatables.min.css') }}">
{% endblock %}
{% block content %}
<div class="table-responsive">
<table id="table" class="table" style="width:96%">
<thead>
<tr>
<th>SHA 521</th>
<th>Frequency</th>
<th>Number unique domains</th>
<th>Context</th>
</tr>
</thead>
<tbody>
{% for h, freq, number_domains, context, capture_uuid, urlnode_uuid, hostnode_uuid in ressources %}
<tr>
<td>
<a href="{{ url_for('body_hash_details', body_hash=h) }}">{{ shorten_string(h, 20) }}</a></br>
{{ get_ressource_button(capture_uuid, urlnode_uuid, h, 'Download sample') }}
</td>
<td>{{ freq }}</td>
<td>{{ number_domains }}</td>
<td> {{ context['type'] }} - {{ context['details'] }}</br>
{{ context_form(capture_uuid, urlnode_uuid, hostnode_uuid, h, 'ressources') }}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
{% endblock %}