From 41287c2cebd5300e5cd4a7cdfdd9ff2cf2b00f7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Tue, 26 Nov 2024 20:42:30 +0100 Subject: [PATCH] new: Paginate hostname table --- lookyloo/indexing.py | 12 ++++---- website/web/__init__.py | 44 ++++++++++++++++++++++++----- website/web/genericapi.py | 3 +- website/web/sri.txt | 2 +- website/web/static/render_tables.js | 13 ++++++++- website/web/templates/hostname.html | 32 +-------------------- 6 files changed, 60 insertions(+), 46 deletions(-) diff --git a/lookyloo/indexing.py b/lookyloo/indexing.py index cc2a5a91..992d6b97 100644 --- a/lookyloo/indexing.py +++ b/lookyloo/indexing.py @@ -512,21 +512,23 @@ class Indexing(): return self.redis.zcard(f'urls|{md5}|captures') def get_captures_hostname(self, hostname: str, most_recent_capture: datetime | None = None, - oldest_capture: datetime | None= None) -> list[tuple[str, float]]: + oldest_capture: datetime | None= None, + offset: int | None=None, limit: int | None=None) -> tuple[int, list[tuple[str, float]]]: """Get all the captures for a specific hostname, on a time interval starting from the most recent one. :param url: The URL :param most_recent_capture: The capture time of the most recent capture to consider - :param oldest_capture: The capture time of the oldest capture to consider, defaults to 15 days ago. + :param oldest_capture: The capture time of the oldest capture to consider. """ max_score: str | float = most_recent_capture.timestamp() if most_recent_capture else '+Inf' - min_score: str | float = oldest_capture.timestamp() if oldest_capture else (datetime.now() - timedelta(days=15)).timestamp() + min_score: str | float = oldest_capture.timestamp() if oldest_capture else '-Inf' if self.redis.type(f'hostnames|{hostname}|captures') == 'set': # type: ignore[no-untyped-call] # triggers the re-index soon. self.redis.srem('indexed_urls', *self.redis.smembers(f'hostnames|{hostname}|captures')) self.redis.delete(f'hostnames|{hostname}|captures') - return [] - return self.redis.zrevrangebyscore(f'hostnames|{hostname}|captures', max_score, min_score, withscores=True) + return 0, [] + total = self.redis.zcard(f'hostnames|{hostname}|captures') + return total, self.redis.zrevrangebyscore(f'hostnames|{hostname}|captures', max_score, min_score, withscores=True, start=offset, num=limit) def get_captures_hostname_count(self, hostname: str) -> int: if self.redis.type(f'hostnames|{hostname}|captures') == 'set': # type: ignore[no-untyped-call] diff --git a/website/web/__init__.py b/website/web/__init__.py index 06a585de..3c42ba1f 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -398,14 +398,15 @@ def get_all_urls(capture_uuid: str, /) -> dict[str, dict[str, int | list[URLNode return to_return -def get_hostname_investigator(hostname: str) -> list[tuple[str, str, str, datetime, set[str]]]: +def get_hostname_investigator(hostname: str, offset: int | None=None, limit: int | None=None) -> tuple[int, list[tuple[str, str, str, datetime, set[str]]]]: '''Returns all the captures loading content from that hostname, used in the web interface.''' + total, entries = get_indexing(flask_login.current_user).get_captures_hostname(hostname=hostname, offset=offset, limit=limit) cached_captures = lookyloo.sorted_capture_cache( - [uuid for uuid, _ in get_indexing(flask_login.current_user).get_captures_hostname(hostname=hostname)], + [uuid for uuid, _ in entries], cached_captures_only=True) - return [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp, - get_indexing(flask_login.current_user).get_capture_hostname_nodes(cache.uuid, hostname) - ) for cache in cached_captures] + return total, [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp, + get_indexing(flask_login.current_user).get_capture_hostname_nodes(cache.uuid, hostname) + ) for cache in cached_captures] def get_url_investigator(url: str) -> list[tuple[str, str, str, datetime, set[str]]]: @@ -1797,8 +1798,8 @@ def url_details(url: str) -> str: @app.route('/hostnames/', methods=['GET']) def hostname_details(hostname: str) -> str: from_popup = True if (request.args.get('from_popup') and request.args.get('from_popup') == 'True') else False - captures = get_hostname_investigator(hostname.strip()) - return render_template('hostname.html', hostname=hostname, captures=captures, from_popup=from_popup) + # captures = get_hostname_investigator(hostname.strip()) + return render_template('hostname.html', hostname=hostname, from_popup=from_popup) @app.route('/stats', methods=['GET']) @@ -1977,6 +1978,35 @@ def add_context(tree_uuid: str, node_uuid: str) -> WerkzeugResponse | None: return None +def __prepare_node_view(capture_uuid: str, nodes: set[str]) -> str: + to_return = f'The capture contains this hostname in {len(nodes)} nodes, click below to see them on the tree:' + to_return += '' + return to_return + + +@app.route('/tables//', methods=['POST']) +def post_table(table_name: str, value: str) -> Response: + draw = request.form.get('draw', type=int) + start = request.form.get('start', type=int) + length = request.form.get('length', type=int) + if table_name == 'hostnameTable': + total, captures = get_hostname_investigator(value.strip(), offset=start, limit=length) + prepared_captures = [] + for capture_uuid, title, landing_page, capture_time, nodes in captures: + _nodes = __prepare_node_view(capture_uuid, nodes) + to_append = { + 'capture_time': capture_time.isoformat(), + 'capture_title': f"""{title}
{_nodes}""", + 'landing_page': f"""{landing_page}""" + } + prepared_captures.append(to_append) + return jsonify({'draw': draw, 'recordsTotal': total, 'recordsFiltered': total, 'data': prepared_captures}) + return jsonify({}) + + # Query API authorizations = { 'apikey': { diff --git a/website/web/genericapi.py b/website/web/genericapi.py index 879b7bce..c9ffa1d4 100644 --- a/website/web/genericapi.py +++ b/website/web/genericapi.py @@ -380,8 +380,9 @@ class URLInfo(Resource): # type: ignore[misc] def get_hostname_occurrences(hostname: str, /, with_urls_occurrences: bool=False, limit: int=20, cached_captures_only: bool=True) -> list[dict[str, Any]]: '''Get the most recent captures and URL nodes where the hostname has been seen.''' + _, entries = get_indexing(flask_login.current_user).get_captures_hostname(hostname, offset=0, limit=limit) captures = lookyloo.sorted_capture_cache( - [uuid for uuid, _ in get_indexing(flask_login.current_user).get_captures_hostname(hostname)], + [uuid for uuid, _ in entries], cached_captures_only=cached_captures_only) to_return: list[dict[str, Any]] = [] diff --git a/website/web/sri.txt b/website/web/sri.txt index 61577b50..149eeb23 100644 --- a/website/web/sri.txt +++ b/website/web/sri.txt @@ -33,7 +33,7 @@ "loader.gif": "ZZKD5vLSKBWKeUpa2KI9qheUJ49iTI/UULmVU/AX28fBfH00K3lLc2v5pVJZ4qXG1BbB13LTXzRKKU35H2XfNg==", "lookyloo.jpeg": "i6wBj8CsIM5YAQLEMQfhs3CNOSKkErF8AMqqM6ZygSwCyQgv9CU8xt94veMZhM/ufBWoz7kAXmR+yywmxsTxug==", "redirect.png": "PAjzlPV97rEFvH55mG1ZC9wRl98be3yMeX/nENuFkJcds6/AXgSR2ig/QyPULgobSnNgiYieLVWY/oqsgeywrQ==", - "render_tables.js": "Lpte5N1uaEGRFWyn8DlriXQUS51LEMBTj9Ux8CcdkTcg70LuuTo3RPdBrC2b63U/sdaOR9RuDFo9Z4UELK2GTw==", + "render_tables.js": "Qv7Mlmqz5f5eu2YUfKPGw8wcJ+F4Iu9MoPaFpDYBTWtGUbszYVbdeiHoL9RYqdGrkWosc3Z8R+C0aIHF11UX1A==", "secure.svg": "H8ni7t0d60nCJDVGuZpuxC+RBy/ipAjWT627D12HlZGg6LUmjSwPTQTUekm3UJupEP7TUkhXyq6WHc5gy7QBjg==", "stats.css": "/kY943FwWBTne4IIyf7iBROSfbGd82TeBicEXqKkRwawMVRIvM/Pk5MRa7okUyGIxaDjFQGmV/U1vy+PhN6Jbw==", "stats_graph.js": "S/sMNQK1UMMLD0xQeEa7sq3ce8o6oPxwxGlyKVtaHOODjair86dbBDm7cu6pa/elMRDJT1j09jEFjWp+5GbhTw==", diff --git a/website/web/static/render_tables.js b/website/web/static/render_tables.js index 4cc2bd7a..aaac7e31 100644 --- a/website/web/static/render_tables.js +++ b/website/web/static/render_tables.js @@ -129,8 +129,19 @@ } if (document.getElementById('hostnameTable')) { + hostname = document.getElementById('hostnameTable').dataset.hostname; new DataTable('#hostnameTable', { - retrieve: true, + processing: true, + serverSide: true, + ajax: { + url: `/tables/hostnameTable/${hostname}`, + type: 'POST' + }, + columns : [ + { data: 'capture_time' }, + { data: 'capture_title' }, + { data: 'landing_page' } + ], order: [[ 0, "desc" ]], columnDefs: [{ width: '20%', targets: 0, render: (data) => { diff --git a/website/web/templates/hostname.html b/website/web/templates/hostname.html index 857f7ea8..7050b6b1 100644 --- a/website/web/templates/hostname.html +++ b/website/web/templates/hostname.html @@ -20,7 +20,7 @@
Only the most recent captures are listed below, this will change soon.
- +
@@ -28,35 +28,5 @@ - - {% for capture_uuid, title, landing_page, capture_time, nodes in captures %} - - - - - - {% endfor %} -
Capture TimeLanding page
- {{capture_time}} - - - {{ title }} - -
- The capture contains this hostname in {{ nodes|length }} nodes, click below to see them on the tree: - -
- - {{ landing_page }} - -
{% endblock %}