From e0faad5490fa0def1511a681401a77a87dc6f3de Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= <raphael@vinot.info>
Date: Thu, 28 Nov 2024 14:22:45 +0100
Subject: [PATCH] new: Paginate identifiers

---
 lookyloo/indexing.py                          | 12 ++++----
 website/web/__init__.py                       | 30 ++++++++++++-------
 website/web/sri.txt                           |  2 +-
 website/web/static/render_tables.js           | 13 ++++++++
 website/web/templates/identifier_details.html | 21 +------------
 5 files changed, 42 insertions(+), 36 deletions(-)

diff --git a/lookyloo/indexing.py b/lookyloo/indexing.py
index bd1bd8ab..147b3f24 100644
--- a/lookyloo/indexing.py
+++ b/lookyloo/indexing.py
@@ -852,23 +852,25 @@ class Indexing():
 
     def get_captures_identifier(self, identifier_type: str, identifier: str,
                                 most_recent_capture: datetime | None=None,
-                                oldest_capture: datetime | None=None) -> list[tuple[str, float]]:
+                                oldest_capture: datetime | None=None,
+                                offset: int | None=None, limit: int | None=None) -> tuple[int, list[tuple[str, float]]]:
         """Get all the captures for a specific identifier of a specific type,
         on a time interval starting from the most recent one.
 
         :param identifier_type: The type of identifier
         :param identifier: The identifier
         :param most_recent_capture: The capture time of the most recent capture to consider
-        :param oldest_capture: The capture time of the oldest capture to consider, defaults to 30 days ago.
+        :param oldest_capture: The capture time of the oldest capture to consider.
         """
         max_score: str | float = most_recent_capture.timestamp() if most_recent_capture else '+Inf'
-        min_score: str | float = oldest_capture.timestamp() if oldest_capture else (datetime.now() - timedelta(days=30)).timestamp()
+        min_score: str | float = oldest_capture.timestamp() if oldest_capture else '-Inf'
         if self.redis.type(f'identifiers|{identifier_type}|{identifier}|captures') == 'set':  # type: ignore[no-untyped-call]
             # triggers the re-index soon.
             self.redis.srem('indexed_identifiers', *self.redis.smembers(f'identifiers|{identifier_type}|{identifier}|captures'))
             self.redis.delete(f'identifiers|{identifier_type}|{identifier}|captures')
-            return []
-        return self.redis.zrevrangebyscore(f'identifiers|{identifier_type}|{identifier}|captures', max_score, min_score, withscores=True)
+            return 0, []
+        total = self.redis.zcard(f'identifiers|{identifier_type}|{identifier}|captures')
+        return total, self.redis.zrevrangebyscore(f'identifiers|{identifier_type}|{identifier}|captures', max_score, min_score, withscores=True, start=offset, num=limit)
 
     def get_captures_identifier_count(self, identifier_type: str, identifier: str) -> int:
         return self.redis.zcard(f'identifiers|{identifier_type}|{identifier}|captures')
diff --git a/website/web/__init__.py b/website/web/__init__.py
index 1cc3cfbe..f9ae6cc1 100644
--- a/website/web/__init__.py
+++ b/website/web/__init__.py
@@ -429,9 +429,11 @@ def get_cookie_name_investigator(cookie_name: str, /) -> list[tuple[str, str, da
     return captures
 
 
-def get_identifier_investigator(identifier_type: str, identifier: str) -> list[tuple[str, str, str, datetime]]:
-    cached_captures = lookyloo.sorted_capture_cache([uuid for uuid, _ in get_indexing(flask_login.current_user).get_captures_identifier(identifier_type=identifier_type, identifier=identifier)])
-    return [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp) for cache in cached_captures]
+def get_identifier_investigator(identifier_type: str, identifier: str, offset: int | None=None, limit: int | None=None) -> tuple[int, list[tuple[str, str, str, datetime]]]:
+    total, entries = get_indexing(flask_login.current_user).get_captures_identifier(identifier_type=identifier_type, identifier=identifier, offset=offset, limit=limit)
+    cached_captures = lookyloo.sorted_capture_cache([uuid for uuid, _ in entries])
+    captures = [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp) for cache in cached_captures]
+    return total, captures
 
 
 def get_capture_hash_investigator(hash_type: str, h: str, offset: int | None=None, limit: int | None=None) -> tuple[int, list[tuple[str, str, str, datetime]]]:
@@ -1745,18 +1747,13 @@ def hhh_detail(hhh: str) -> str:
 
 @app.route('/identifier_details/<string:identifier_type>/<string:identifier>', methods=['GET'])
 def identifier_details(identifier_type: str, identifier: str) -> str:
-    captures = get_identifier_investigator(identifier_type, identifier)
     return render_template('identifier_details.html', identifier_type=identifier_type,
-                           identifier=identifier,
-                           captures=captures)
+                           identifier=identifier)
 
 
 @app.route('/capture_hash_details/<string:hash_type>/<string:h>', methods=['GET'])
 def capture_hash_details(hash_type: str, h: str) -> str:
-    captures = get_capture_hash_investigator(hash_type, h)
-    return render_template('hash_type_details.html', hash_type=hash_type,
-                           h=h,
-                           captures=captures)
+    return render_template('hash_type_details.html', hash_type=hash_type, h=h)
 
 
 @app.route('/favicon_details/<string:favicon_sha512>', methods=['GET'])
@@ -1991,6 +1988,19 @@ def post_table(table_name: str, value: str) -> Response:
     start = request.form.get('start', type=int)
     length = request.form.get('length', type=int)
     captures: list[tuple[str, str, str, datetime, set[str]]] | list[tuple[str, str, str, datetime]]
+    if table_name == 'identifierDetailsTable':
+        identifier_type, identifier = value.strip().split('|')
+        total, captures = get_identifier_investigator(identifier_type, identifier, offset=start, limit=length)
+        prepared_captures = []
+        for capture_uuid, title, landing_page, capture_time in captures:
+            to_append = {
+                'capture_time': capture_time.isoformat(),
+                'capture_title': f"""<a href="{url_for('tree', tree_uuid=capture_uuid)}">{title}</a>""",
+                'landing_page': f"""<span class="d-inline-block text-break" style="max-width: 400px;">{landing_page}</span>"""
+            }
+            prepared_captures.append(to_append)
+        return jsonify({'draw': draw, 'recordsTotal': total, 'recordsFiltered': total, 'data': prepared_captures})
+
     if table_name == 'hashTypeDetailsTable':
         hash_type, h = value.strip().split('|')
         total, captures = get_capture_hash_investigator(hash_type, h, offset=start, limit=length)
diff --git a/website/web/sri.txt b/website/web/sri.txt
index 4027cdbd..c6207a4d 100644
--- a/website/web/sri.txt
+++ b/website/web/sri.txt
@@ -33,7 +33,7 @@
     "loader.gif": "ZZKD5vLSKBWKeUpa2KI9qheUJ49iTI/UULmVU/AX28fBfH00K3lLc2v5pVJZ4qXG1BbB13LTXzRKKU35H2XfNg==",
     "lookyloo.jpeg": "i6wBj8CsIM5YAQLEMQfhs3CNOSKkErF8AMqqM6ZygSwCyQgv9CU8xt94veMZhM/ufBWoz7kAXmR+yywmxsTxug==",
     "redirect.png": "PAjzlPV97rEFvH55mG1ZC9wRl98be3yMeX/nENuFkJcds6/AXgSR2ig/QyPULgobSnNgiYieLVWY/oqsgeywrQ==",
-    "render_tables.js": "9+CqbiqwG3SeoIwa/nRPP9RiTZBZU0BNqmXAO5A847rd0gNTpZaB15yDqj0gGG7HZVq/ckv53wN3jfo+Kk1rTw==",
+    "render_tables.js": "1FClG3TjTlQputaFO1Yy5lvio7ahjLg3RspaFFoEIXLNQQdsizGGU9D7T9lVOYvG4Nl09tKLzb1ud+pQMCTZaA==",
     "secure.svg": "H8ni7t0d60nCJDVGuZpuxC+RBy/ipAjWT627D12HlZGg6LUmjSwPTQTUekm3UJupEP7TUkhXyq6WHc5gy7QBjg==",
     "stats.css": "/kY943FwWBTne4IIyf7iBROSfbGd82TeBicEXqKkRwawMVRIvM/Pk5MRa7okUyGIxaDjFQGmV/U1vy+PhN6Jbw==",
     "stats_graph.js": "S/sMNQK1UMMLD0xQeEa7sq3ce8o6oPxwxGlyKVtaHOODjair86dbBDm7cu6pa/elMRDJT1j09jEFjWp+5GbhTw==",
diff --git a/website/web/static/render_tables.js b/website/web/static/render_tables.js
index a0bc5d15..a61f386c 100644
--- a/website/web/static/render_tables.js
+++ b/website/web/static/render_tables.js
@@ -44,7 +44,20 @@
   }
 
   if (document.getElementById('identifierDetailsTable')) {
+      identifier_value = document.getElementById('identifierDetailsTable').dataset.identifier;
       new DataTable('#identifierDetailsTable', {
+        processing: true,
+        serverSide: true,
+        retrieve: true,
+        ajax: {
+            url: `/tables/identifierDetailsTable/${identifier_value}`,
+            type: 'POST'
+        },
+        columns : [
+            { data: 'capture_time' },
+            { data: 'capture_title' },
+            { data: 'landing_page' }
+        ],
         retrieve: true,
         order: [[ 0, "desc" ]],
         columnDefs: [{ width: '30%',
diff --git a/website/web/templates/identifier_details.html b/website/web/templates/identifier_details.html
index aaf94177..afea7abb 100644
--- a/website/web/templates/identifier_details.html
+++ b/website/web/templates/identifier_details.html
@@ -3,7 +3,7 @@
 <center>
   <h5>{{identifier_type}}: {{identifier}}</h5>
 </center>
-<table id="identifierDetailsTable" class="table table-striped" style="width:100%">
+<table id="identifierDetailsTable" class="table table-striped" style="width:100%" data-identifier="{{identifier_type}}|{{identifier}}">
   <thead>
     <tr>
       <th>Capture Time</th>
@@ -11,23 +11,4 @@
       <th>Landing page</th>
     </tr>
   </thead>
-  <tbody>
-    {% for capture_uuid, title, landing_page, capture_time in captures %}
-    <tr>
-      <td>
-        {{capture_time}}
-      </td>
-      <td>
-        <a href="{{ url_for('tree', tree_uuid=capture_uuid) }}">
-          {{ title }}
-        </a>
-      </td>
-      <td>
-        <span class="d-inline-block text-break" style="max-width: 400px;">
-          {{ landing_page }}
-        </span>
-      </td>
-    </tr>
-    {% endfor %}
-  </tbody>
 </table>