new: Paginate resources

pull/1008/head
Raphaël Vinot 2024-12-02 13:40:36 +01:00
parent e0faad5490
commit 359d9b8511
6 changed files with 92 additions and 66 deletions

View File

@ -321,21 +321,23 @@ class Indexing():
return None
def get_captures_body_hash(self, body_hash: str, most_recent_capture: datetime | None = None,
oldest_capture: datetime | None = None) -> list[tuple[str, float]]:
oldest_capture: datetime | None = None,
offset: int | None=None, limit: int | None=None) -> tuple[int, list[tuple[str, float]]]:
'''Get the captures matching the hash.
:param body_hash: The hash to search for
:param filter_capture_uuid: UUID of the capture the hash was found in
'''
max_score: str | float = most_recent_capture.timestamp() if most_recent_capture else '+Inf'
min_score: str | float = oldest_capture.timestamp() if oldest_capture else (datetime.now() - timedelta(days=15)).timestamp()
min_score: str | float = oldest_capture.timestamp() if oldest_capture else '-Inf'
if self.redis.type(f'bh|{body_hash}|captures') == 'set': # type: ignore[no-untyped-call]
# triggers the re-index soon.
self.redis.srem('indexed_body_hashes', *self.redis.smembers(f'bh|{body_hash}|captures'))
self.redis.delete(f'bh|{body_hash}|captures')
return []
return self.redis.zrevrangebyscore(f'body_hashes|{body_hash}|captures', max_score, min_score, withscores=True)
return 0, []
total = self.redis.zcard(f'body_hashes|{body_hash}|captures')
return total, self.redis.zrevrangebyscore(f'body_hashes|{body_hash}|captures', max_score, min_score, withscores=True, start=offset, num=limit)
def get_capture_body_hash_nodes(self, capture_uuid: str, body_hash: str) -> set[str]:
if url_nodes := self.redis.smembers(f'capture_indexes|{capture_uuid}|body_hashes|{body_hash}'):
@ -343,8 +345,10 @@ class Indexing():
return set()
def get_body_hash_urlnodes(self, body_hash: str) -> dict[str, set[str]]:
# FIXME: figure out a reasonable limit for that
_, entries = self.get_captures_body_hash(body_hash, limit=100)
return {capture_uuid: self.redis.smembers(f'capture_indexes|{capture_uuid}|body_hashes|{body_hash}')
for capture_uuid, capture_ts in self.get_captures_body_hash(body_hash)}
for capture_uuid, capture_ts in entries}
# ###### HTTP Headers Hashes ######

View File

@ -339,11 +339,10 @@ def handle_pydandic_validation_exception(error: CaptureSettingsError) -> Respons
# ##### Methods querying the indexes #####
def _get_body_hash_investigator(body_hash: str, /) -> list[tuple[str, str, datetime, str, str]]:
def _get_body_hash_investigator(body_hash: str, offset: int | None=None, limit: int | None=None) -> tuple[int, list[tuple[str, str, datetime, str, str]]]:
'''Returns all the captures related to a hash (sha512), used in the web interface.'''
cached_captures = lookyloo.sorted_capture_cache(
[uuid for uuid, _ in get_indexing(flask_login.current_user).get_captures_body_hash(body_hash)],
cached_captures_only=True)
total, entries = get_indexing(flask_login.current_user).get_captures_body_hash(body_hash=body_hash, offset=offset, limit=limit)
cached_captures = lookyloo.sorted_capture_cache([uuid for uuid, _ in entries])
captures = []
for cache in cached_captures:
if not cache:
@ -354,7 +353,7 @@ def _get_body_hash_investigator(body_hash: str, /) -> list[tuple[str, str, datet
except IndexError:
continue
captures.append((cache.uuid, cache.title, cache.timestamp, urlnode.hostnode_uuid, urlnode.name))
return captures
return total, captures
def get_all_body_hashes(capture_uuid: str, /) -> dict[str, dict[str, URLNode | int]]:
@ -401,9 +400,7 @@ def get_all_urls(capture_uuid: str, /) -> dict[str, dict[str, int | list[URLNode
def get_hostname_investigator(hostname: str, offset: int | None=None, limit: int | None=None) -> tuple[int, list[tuple[str, str, str, datetime, set[str]]]]:
'''Returns all the captures loading content from that hostname, used in the web interface.'''
total, entries = get_indexing(flask_login.current_user).get_captures_hostname(hostname=hostname, offset=offset, limit=limit)
cached_captures = lookyloo.sorted_capture_cache(
[uuid for uuid, _ in entries],
cached_captures_only=True)
cached_captures = lookyloo.sorted_capture_cache([uuid for uuid, _ in entries])
return total, [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp,
get_indexing(flask_login.current_user).get_capture_hostname_nodes(cache.uuid, hostname)
) for cache in cached_captures]
@ -412,9 +409,7 @@ def get_hostname_investigator(hostname: str, offset: int | None=None, limit: int
def get_url_investigator(url: str, offset: int | None=None, limit: int | None=None) -> tuple[int, list[tuple[str, str, str, datetime, set[str]]]]:
'''Returns all the captures loading content from that url, used in the web interface.'''
total, entries = get_indexing(flask_login.current_user).get_captures_url(url=url, offset=offset, limit=limit)
cached_captures = lookyloo.sorted_capture_cache(
[uuid for uuid, _ in entries],
cached_captures_only=True)
cached_captures = lookyloo.sorted_capture_cache([uuid for uuid, _ in entries])
return total, [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp,
get_indexing(flask_login.current_user).get_capture_url_nodes(cache.uuid, url)
) for cache in cached_captures]
@ -478,9 +473,9 @@ def hash_lookup(blob_hash: str, url: str, current_capture_uuid: str) -> tuple[in
If a URL is given, it splits the results if the hash is seen on the same URL or an other one.
Capture UUID avoids duplicates on the same capture'''
captures_list: dict[str, list[tuple[str, str, str, str, str]]] = {'same_url': [], 'different_url': []}
_, entries = get_indexing(flask_login.current_user).get_captures_body_hash(blob_hash, oldest_capture=datetime.now() - timedelta(**time_delta_on_index))
cached_captures = lookyloo.sorted_capture_cache(
[uuid for uuid, _ in get_indexing(flask_login.current_user).get_captures_body_hash(blob_hash,
oldest_capture=datetime.now() - timedelta(**time_delta_on_index))],
[uuid for uuid, _ in entries],
cached_captures_only=True)
for cache in cached_captures:
if cache.uuid == current_capture_uuid:
@ -1441,7 +1436,8 @@ def ressources() -> str:
freq = get_indexing(flask_login.current_user).get_captures_body_hash_count(h)
context = lookyloo.context.find_known_content(h)
# Only get the recent captures
for capture_uuid, capture_ts in get_indexing(flask_login.current_user).get_captures_body_hash(h):
_, entries = get_indexing(flask_login.current_user).get_captures_body_hash(h, oldest_capture=datetime.now() - timedelta(**time_delta_on_index))
for capture_uuid, capture_ts in entries:
url_nodes = get_indexing(flask_login.current_user).get_capture_body_hash_nodes(capture_uuid, h)
url_node = url_nodes.pop()
ressource = lookyloo.get_ressource(capture_uuid, url_node, h)
@ -1778,8 +1774,7 @@ def favicon_detail(favicon_sha512: str) -> str:
@app.route('/body_hashes/<string:body_hash>', methods=['GET'])
def body_hash_details(body_hash: str) -> str:
from_popup = True if (request.args.get('from_popup') and request.args.get('from_popup') == 'True') else False
captures = _get_body_hash_investigator(body_hash.strip())
return render_template('body_hash.html', body_hash=body_hash, captures=captures, from_popup=from_popup)
return render_template('body_hash.html', body_hash=body_hash, from_popup=from_popup)
@app.route('/urls/<string:url>', methods=['GET'])
@ -1793,7 +1788,6 @@ def url_details(url: str) -> str:
@app.route('/hostnames/<string:hostname>', methods=['GET'])
def hostname_details(hostname: str) -> str:
from_popup = True if (request.args.get('from_popup') and request.args.get('from_popup') == 'True') else False
# captures = get_hostname_investigator(hostname.strip())
return render_template('hostname.html', hostname=hostname, from_popup=from_popup)
@ -1984,10 +1978,27 @@ def __prepare_node_view(capture_uuid: str, nodes: set[str]) -> str:
@app.route('/tables/<string:table_name>/<string:value>', methods=['POST'])
def post_table(table_name: str, value: str) -> Response:
from_popup = True if (request.args.get('from_popup') and request.args.get('from_popup') == 'True') else False
draw = request.form.get('draw', type=int)
start = request.form.get('start', type=int)
length = request.form.get('length', type=int)
captures: list[tuple[str, str, str, datetime, set[str]]] | list[tuple[str, str, str, datetime]]
captures: list[tuple[str, str, datetime, str, str]] | list[tuple[str, str, str, datetime, set[str]]] | list[tuple[str, str, str, datetime]]
if table_name == 'bodyHashDetailsTable':
body_hash = value.strip()
total, captures = _get_body_hash_investigator(body_hash, offset=start, limit=length)
prepared_captures = []
for capture_uuid, title, capture_time, hostnode_uuid, url in captures:
to_append = {
'capture_time': capture_time.isoformat(),
'url': f"""<span class="d-inline-block text-break" style="max-width: 400px;">{url}</span>"""
}
if from_popup:
to_append['capture_title'] = f"""<button type="button" class="btn btn-link openNewTab" data-capture="{capture_uuid}" data-hostnode="{hostnode_uuid}">{title}</button>"""
else:
to_append['capture_title'] = f"""<a href="{url_for('tree', tree_uuid=capture_uuid, node_uuid=hostnode_uuid)}">{title}</a>"""
prepared_captures.append(to_append)
return jsonify({'draw': draw, 'recordsTotal': total, 'recordsFiltered': total, 'data': prepared_captures})
if table_name == 'identifierDetailsTable':
identifier_type, identifier = value.strip().split('|')
total, captures = get_identifier_investigator(identifier_type, identifier, offset=start, limit=length)

View File

@ -20,7 +20,7 @@
"favicon.ico": "KOmrfwRbOQqhhwSeBkNpMRAxSVMmmLg+2kRMg9iSv7OWjE9spJc7x4MKB4AE/hi0knaV7UBVctAU6XZ7AC72ZA==",
"font.png": "RwoQkj9dT9SLUL2F7cAA16Nat9t2hDb58eQlHF9ThUar829p0INUXG+5XuDaFOC8SsmCZK5vw2f+YAQ6mLC1Qw==",
"generic.css": "Sh/BcxFMLYYaLdCluVt9efGvJ9CF5d+YJ7lkL2M24PRGu8VZHI9lJiUlFObIocjQgwss3Ve2U5cUAE5WiAdpQQ==",
"generic.js": "B1Kd8EpQRDDLAxh7eD2QZNyDBwY8zyC8NBoGhml8zpF9TRsEtiH7XcXus+NMNvRtqKoo93Xhc+JTAW9Fn0lMdw==",
"generic.js": "xeXEopg1wyufE1owZH6don+1vtvsJnbM/BYC+oLOWHTtgpsGKfNhzyLuZsb3DSKtU7knIK+vwPwPJepncWM8KA==",
"hostname_popup.js": "froqRK2HEphJSU++PkPfEvaztrzH05QwZ4q2wEBAL6JDinpOCZqW9GMMV6oBhpTmyu5zfsz6ZpqrfaB0C0iIwg==",
"html.png": "T7pZrb8MMDsA/JV/51hu+TOglTqlxySuEVY0rpDjTuAEyhzk2v+W4kYrj7vX+Tp3n2d2lvVD08PwhCG62Yfbzg==",
"ifr.png": "rI5YJypmz1QcULRf9UaOYSqV4tPUSxUdLAycoYzCwywt4Pw4eWzBg9SUr769VyIimoiIyJR+aNuoIA4p5WO2fQ==",
@ -33,7 +33,7 @@
"loader.gif": "ZZKD5vLSKBWKeUpa2KI9qheUJ49iTI/UULmVU/AX28fBfH00K3lLc2v5pVJZ4qXG1BbB13LTXzRKKU35H2XfNg==",
"lookyloo.jpeg": "i6wBj8CsIM5YAQLEMQfhs3CNOSKkErF8AMqqM6ZygSwCyQgv9CU8xt94veMZhM/ufBWoz7kAXmR+yywmxsTxug==",
"redirect.png": "PAjzlPV97rEFvH55mG1ZC9wRl98be3yMeX/nENuFkJcds6/AXgSR2ig/QyPULgobSnNgiYieLVWY/oqsgeywrQ==",
"render_tables.js": "1FClG3TjTlQputaFO1Yy5lvio7ahjLg3RspaFFoEIXLNQQdsizGGU9D7T9lVOYvG4Nl09tKLzb1ud+pQMCTZaA==",
"render_tables.js": "hG5hQVtegWd8gc4HX/iFR8+YsUjLxTL/XbkbFQrombrVLQrR+yH0sGM6wq0PE/6hLbetOfrXnf91Kp1gBsm6bg==",
"secure.svg": "H8ni7t0d60nCJDVGuZpuxC+RBy/ipAjWT627D12HlZGg6LUmjSwPTQTUekm3UJupEP7TUkhXyq6WHc5gy7QBjg==",
"stats.css": "/kY943FwWBTne4IIyf7iBROSfbGd82TeBicEXqKkRwawMVRIvM/Pk5MRa7okUyGIxaDjFQGmV/U1vy+PhN6Jbw==",
"stats_graph.js": "S/sMNQK1UMMLD0xQeEa7sq3ce8o6oPxwxGlyKVtaHOODjair86dbBDm7cu6pa/elMRDJT1j09jEFjWp+5GbhTw==",

View File

@ -39,22 +39,27 @@ function downloadBase64File(contentType, base64Data, fileName) {
downloadLink.click();
}
function newTabClickListener() {
document.querySelectorAll('.openNewTab').forEach(el => el.addEventListener('click', event => {
if (window.opener === null) {
return openTreeInNewTab(el.dataset.capture, el.dataset.hostnode)
} else {
let success = window.opener.openTreeInNewTab(el.dataset.capture, el.dataset.hostnode);
if (! success) {
alert("Your browser doesn't allow Lookyloo to open a new tab. There should be an icon on the right side of your URL bar *in the main window* to allow it.");
}
}
}));
};
document.addEventListener("DOMContentLoaded", () => {
document.querySelectorAll('.goBack').forEach(el => el.addEventListener('click', event => {
window.history.back();
}));
document.querySelectorAll('.openNewTab').forEach(el => el.addEventListener('click', event => {
if (window.opener === null) {
return openTreeInNewTab(el.dataset.capture, el.dataset.hostnode)
} else {
let success = window.opener.openTreeInNewTab(el.dataset.capture, el.dataset.hostnode);
if (! success) {
alert("Your browser doesn't allow Lookyloo to open a new tab. There should be an icon on the right side of your URL bar *in the main window* to allow it.");
}
}
}));
newTabClickListener();
document.querySelectorAll(".locateInTree").forEach(el => el.addEventListener('click', event => {
window.opener.LocateNode(el.dataset.hostnode);

View File

@ -1,18 +1,30 @@
["DOMContentLoaded", "shown.bs.modal", "jquery.modal.rendered"].forEach(e => window.addEventListener(e, function() {
if (document.getElementById('bodyHashDetailsTable')) {
bodyhash = document.getElementById('bodyHashDetailsTable').dataset.bodyhash;
new DataTable('#bodyHashDetailsTable', {
retrieve: true,
order: [[ 0, "desc" ]],
columnDefs: [{ width: '20%', targets: 0,
render: (data) => {
const date = new Date(data);
return date.getFullYear() + '-' + (date.getMonth() + 1).toString().padStart(2, "0") + '-' + date.getDate().toString().padStart(2, "0") + ' ' + date.toTimeString();
}
},
{ width: '40%', targets: 1 },
{ width: '40%', targets: 2 }],
});
processing: true,
serverSide: true,
retrieve: true,
drawCallback: newTabClickListener(),
ajax: {
url: `/tables/bodyHashDetailsTable/${bodyhash}${window.location.search}`,
type: 'POST'
},
columns : [
{ data: 'capture_time' },
{ data: 'capture_title' },
{ data: 'url' }
],
order: [[ 0, "desc" ]],
columnDefs: [{ width: '20%', targets: 0,
render: (data) => {
const date = new Date(data);
return date.getFullYear() + '-' + (date.getMonth() + 1).toString().padStart(2, "0") + '-' + date.getDate().toString().padStart(2, "0") + ' ' + date.toTimeString();
}
},
{ width: '40%', targets: 1 },
{ width: '40%', targets: 2 }],
})
}
if (document.getElementById('hashTypeDetailsTable')) {
hash_value = document.getElementById('hashTypeDetailsTable').dataset.hashvalue;
@ -20,6 +32,7 @@
processing: true,
serverSide: true,
retrieve: true,
drawCallback: newTabClickListener(),
ajax: {
url: `/tables/hashTypeDetailsTable/${hash_value}`,
type: 'POST'
@ -49,6 +62,7 @@
processing: true,
serverSide: true,
retrieve: true,
drawCallback: newTabClickListener(),
ajax: {
url: `/tables/identifierDetailsTable/${identifier_value}`,
type: 'POST'
@ -75,6 +89,7 @@
if (document.getElementById('bodyHashesTable')) {
new DataTable('#bodyHashesTable', {
retrieve: true,
drawCallback: newTabClickListener(),
order: [[ 0, "desc" ]],
columnDefs: [{ width: '10%', targets: 0 },
{ width: '10%', targets: 1 },
@ -90,6 +105,7 @@
if (document.getElementById('faviconsTable')) {
new DataTable('#faviconsTable', {
retrieve: true,
drawCallback: newTabClickListener(),
columnDefs: [{ width: '10%', targets: 0 },
{ width: '40%', targets: 1 },
{ width: '40%', targets: 2 },
@ -99,6 +115,7 @@
if (document.getElementById('treeHashesTable')) {
new DataTable('#treeHashesTable', {
retrieve: true,
drawCallback: newTabClickListener(),
columnDefs: [{ width: '20%', targets: 0 },
{ width: '40%', targets: 1 },
{ width: '40%', targets: 2 }],
@ -107,6 +124,7 @@
if (document.getElementById('hostnamesTable')) {
new DataTable('#hostnamesTable', {
retrieve: true,
drawCallback: newTabClickListener(),
order: [[ 0, "desc" ]],
columnDefs: [{ width: '10%', targets: 0 },
{ width: '40%', targets: 1 },
@ -121,6 +139,7 @@
if (document.getElementById('identifiersTable')) {
new DataTable('#identifiersTable', {
retrieve: true,
drawCallback: newTabClickListener(),
columnDefs: [{ width: '20%', targets: 0 },
{ width: '40%', targets: 1 },
{ width: '40%', targets: 2 }],
@ -129,6 +148,7 @@
if (document.getElementById('urlsTable')) {
new DataTable('#urlsTable', {
retrieve: true,
drawCallback: newTabClickListener(),
order: [[ 0, "desc" ]],
columnDefs: [{ width: '10%', targets: 0 },
{ width: '90%', targets: 1 }],
@ -142,6 +162,7 @@
if (document.getElementById('cookieNameTable')) {
new DataTable('#cookieNameTable', {
retrieve: true,
drawCallback: newTabClickListener(),
order: [[ 0, "desc" ]],
columnDefs: [{ width: '30%', targets: 0,
render: (data) => {
@ -159,6 +180,7 @@
processing: true,
serverSide: true,
retrieve: true,
drawCallback: newTabClickListener(),
ajax: {
url: `/tables/hostnameTable/${hostname}`,
type: 'POST'
@ -186,6 +208,7 @@
processing: true,
serverSide: true,
retrieve: true,
drawCallback: newTabClickListener(),
ajax: {
url: `/tables/urlTable/${url}`,
type: 'POST'
@ -213,6 +236,7 @@
processing: true,
serverSide: true,
retrieve: true,
drawCallback: newTabClickListener(),
ajax: {
url: `/tables/faviconDetailsTable/${favicon}`,
type: 'POST'
@ -240,5 +264,4 @@
document.querySelectorAll(".downloadFaviconButton").forEach(el => el.addEventListener('click', event => {
downloadBase64File(el.dataset.mimetype, el.dataset.b64favicon, el.dataset.filename);
}))
}));

View File

@ -18,12 +18,10 @@
<center>
<h6>{{ body_hash }}</h6>
<h6>Only the most recent captures are listed below, this will change soon.</h6>
<a href="{{ url_for('ressource_by_hash', sha512=body_hash) }}">Download</a>
</center>
<p>The same file was seen in these captures recently:</p>
<table id="bodyHashDetailsTable" class="table table-striped" style="width:100%">
<table id="bodyHashDetailsTable" class="table table-striped" style="width:100%" data-bodyhash="{{body_hash}}">
<thead>
<tr>
<th>Capture Time</th>
@ -31,20 +29,5 @@
<th>URL</th>
</tr>
</thead>
<tbody>
{% for capture_uuid, title, timestamp, hostnode_uuid, url in captures %}
<tr>
<td>{{ timestamp }}</td>
<td>
{% if from_popup %}
<button type="button" class="btn btn-link openNewTab" data-capture="{{capture_uuid}}" data-hostnode="{{hostnode_uuid}}">{{ title }}</button>
{% else %}
<a href="{{ url_for('tree', tree_uuid=capture_uuid, node_uuid=hostnode_uuid) }}">{{ title }}</a>
{% endif %}
</td>
<td>{{ url }}</td>
</tr>
{% endfor %}
</tbody>
</table>
{% endblock %}