diff --git a/lookyloo/indexing.py b/lookyloo/indexing.py index a7564ea7..3fbef23c 100644 --- a/lookyloo/indexing.py +++ b/lookyloo/indexing.py @@ -223,21 +223,23 @@ class Indexing(): self.logger.debug(f'done with cookies for {crawled_tree.uuid}.') def get_captures_cookies_name(self, cookie_name: str, most_recent_capture: datetime | None = None, - oldest_capture: datetime | None= None) -> list[tuple[str, float]]: + oldest_capture: datetime | None= None, + offset: int | None=None, limit: int | None=None) -> tuple[int, list[tuple[str, float]]]: """Get all the captures for a specific cookie name, on a time interval starting from the most recent one. :param cookie_name: The cookie name :param most_recent_capture: The capture time of the most recent capture to consider - :param oldest_capture: The capture time of the oldest capture to consider, defaults to 20 days ago. + :param oldest_capture: The capture time of the oldest capture to consider. """ max_score: str | float = most_recent_capture.timestamp() if most_recent_capture else '+Inf' - min_score: str | float = oldest_capture.timestamp() if oldest_capture else (datetime.now() - timedelta(days=20)).timestamp() + min_score: str | float = oldest_capture.timestamp() if oldest_capture else '-Inf' if self.redis.type(f'cookies_names|{cookie_name}|captures') == 'set': # type: ignore[no-untyped-call] # triggers the re-index soon. self.redis.srem('indexed_cookies', *[entry.split('|')[0] for entry in self.redis.smembers(f'cn|{cookie_name}|captures')]) self.redis.delete(f'cookies_names|{cookie_name}|captures') - return [] - return self.redis.zrevrangebyscore(f'cookies_names|{cookie_name}|captures', max_score, min_score, withscores=True) + return 0, [] + total = self.redis.zcard(f'cookies_names|{cookie_name}|captures') + return total, self.redis.zrevrangebyscore(f'cookies_names|{cookie_name}|captures', max_score, min_score, withscores=True, start=offset, num=limit) def get_captures_cookie_name_count(self, cookie_name: str) -> int: return self.redis.zcard(f'cookies_names|{cookie_name}|captures') @@ -398,21 +400,23 @@ class Indexing(): self.logger.debug(f'done with HHHashes for {crawled_tree.uuid}.') def get_captures_hhhash(self, hhh: str, most_recent_capture: datetime | None = None, - oldest_capture: datetime | None= None) -> list[tuple[str, float]]: + oldest_capture: datetime | None=None, + offset: int | None=None, limit: int | None=None) -> tuple[int, list[tuple[str, float]]]: """Get all the captures for a specific HTTP Header Hash, on a time interval starting from the most recent one. :param hhh: The HTTP Header Hash :param most_recent_capture: The capture time of the most recent capture to consider - :param oldest_capture: The capture time of the oldest capture to consider, defaults to 15 days ago. + :param oldest_capture: The capture time of the oldest capture to consider. """ max_score: str | float = most_recent_capture.timestamp() if most_recent_capture else '+Inf' - min_score: str | float = oldest_capture.timestamp() if oldest_capture else (datetime.now() - timedelta(days=15)).timestamp() + min_score: str | float = oldest_capture.timestamp() if oldest_capture else '-Inf' if self.redis.type(f'hhhashes|{hhh}|captures') == 'set': # type: ignore[no-untyped-call] # triggers the re-index soon. self.redis.srem('indexed_hhhashes', *self.redis.smembers(f'hhhashes|{hhh}|captures')) self.redis.delete(f'hhhashes|{hhh}|captures') - return [] - return self.redis.zrevrangebyscore(f'hhhashes|{hhh}|captures', max_score, min_score, withscores=True) + return 0, [] + total = self.redis.zcard(f'hhhashes|{hhh}|captures') + return total, self.redis.zrevrangebyscore(f'hhhashes|{hhh}|captures', max_score, min_score, withscores=True, start=offset, num=limit) def get_captures_hhhash_count(self, hhh: str) -> int: return self.redis.zcard(f'hhhashes|{hhh}|captures') @@ -422,6 +426,17 @@ class Indexing(): return set(url_nodes) return set() + def get_node_for_headers(self, hhh: str) -> tuple[str, str] | None: + _, latest_entry = self.get_captures_hhhash(hhh, offset=0, limit=1) + if not latest_entry: + # That shouldn't happen if the hash is indexed + return None + capture_uuid, _ = latest_entry[0] + nodes = self.get_capture_hhhash_nodes(capture_uuid, hhh) + if not nodes: + return None + return capture_uuid, nodes.pop() + # ###### URLs and Domains ###### def _reindex_urls_domains(self, hostname: str, md5_url: str) -> None: diff --git a/website/web/__init__.py b/website/web/__init__.py index 21704218..21e2092d 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -415,13 +415,12 @@ def get_url_investigator(url: str, offset: int | None=None, limit: int | None=No ) for cache in cached_captures] -def get_cookie_name_investigator(cookie_name: str, /) -> list[tuple[str, str, datetime, set[str]]]: +def get_cookie_name_investigator(cookie_name: str, offset: int | None=None, limit: int | None=None) -> tuple[int, list[tuple[str, str, str, datetime, set[str]]]]: '''Returns all the captures related to a cookie name entry, used in the web interface.''' - cached_captures = lookyloo.sorted_capture_cache( - [uuid for uuid, _ in get_indexing(flask_login.current_user).get_captures_cookies_name(cookie_name=cookie_name)], - cached_captures_only=True) - captures = [(cache.uuid, cache.title, cache.timestamp, get_indexing(flask_login.current_user).get_capture_cookie_name_nodes(cache.uuid, cookie_name)) for cache in cached_captures] - return captures + total, entries = get_indexing(flask_login.current_user).get_captures_cookies_name(cookie_name=cookie_name) + cached_captures = lookyloo.sorted_capture_cache([uuid for uuid, _ in entries]) + captures = [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp, get_indexing(flask_login.current_user).get_capture_cookie_name_nodes(cache.uuid, cookie_name)) for cache in cached_captures] + return total, captures def get_identifier_investigator(identifier_type: str, identifier: str, offset: int | None=None, limit: int | None=None) -> tuple[int, list[tuple[str, str, str, datetime]]]: @@ -446,13 +445,11 @@ def get_favicon_investigator(favicon_sha512: str, offset: int | None=None, limit return total, captures -def get_hhh_investigator(hhh: str, /) -> tuple[list[tuple[str, str, str, str]], list[tuple[str, str]]]: +def get_hhh_investigator(hhh: str, offset: int | None=None, limit: int | None=None) -> tuple[int, list[tuple[str, str, datetime, str, str]]]: '''Returns all the captures related to a cookie name entry, used in the web interface.''' - cached_captures = lookyloo.sorted_capture_cache( - [uuid for uuid, _ in get_indexing(flask_login.current_user).get_captures_hhhash(hhh)], - cached_captures_only=True) + total, entries = get_indexing(flask_login.current_user).get_captures_hhhash(hhh, offset=offset, limit=limit) + cached_captures = lookyloo.sorted_capture_cache([uuid for uuid, _ in entries]) captures = [] - headers: list[tuple[str, str]] = [] for cache in cached_captures: if not cache: continue @@ -461,11 +458,8 @@ def get_hhh_investigator(hhh: str, /) -> tuple[list[tuple[str, str, str, str]], urlnode = lookyloo.get_urlnode_from_tree(cache.uuid, urlnode_uuid) except IndexError: continue - captures.append((cache.uuid, urlnode.hostnode_uuid, urlnode.name, cache.title)) - if not headers: - # Just do that once. - headers = [(header["name"], header["value"]) for header in urlnode.response['headers']] - return captures, headers + captures.append((cache.uuid, cache.title, cache.timestamp, urlnode.hostnode_uuid, urlnode.name)) + return total, captures def hash_lookup(blob_hash: str, url: str, current_capture_uuid: str) -> tuple[int, dict[str, list[tuple[str, str, str, str, str]]]]: @@ -1730,15 +1724,18 @@ def simple_capture() -> str | Response | WerkzeugResponse: @app.route('/cookies/', methods=['GET']) def cookies_name_detail(cookie_name: str) -> str: from_popup = True if (request.args.get('from_popup') and request.args.get('from_popup') == 'True') else False - captures = get_cookie_name_investigator(cookie_name.strip()) - return render_template('cookie_name.html', cookie_name=cookie_name, captures=captures, from_popup=from_popup) + return render_template('cookie_name.html', cookie_name=cookie_name, from_popup=from_popup) @app.route('/hhhdetails/', methods=['GET']) def hhh_detail(hhh: str) -> str: from_popup = True if (request.args.get('from_popup') and request.args.get('from_popup') == 'True') else False - captures, headers = get_hhh_investigator(hhh.strip()) - return render_template('hhh_details.html', hhh=hhh, captures=captures, headers=headers, from_popup=from_popup) + headers: list[tuple[str, str]] = [] + if capture_node := get_indexing(flask_login.current_user).get_node_for_headers(hhh): + capture_uuid, node_uuid = capture_node + if urlnode := lookyloo.get_urlnode_from_tree(capture_uuid, node_uuid): + headers = [(header["name"], header["value"]) for header in urlnode.response['headers']] + return render_template('hhh_details.html', hhh=hhh, headers=headers, from_popup=from_popup) @app.route('/identifier_details//', methods=['GET']) @@ -1967,11 +1964,16 @@ def add_context(tree_uuid: str, node_uuid: str) -> WerkzeugResponse | None: return None -def __prepare_node_view(capture_uuid: str, nodes: set[str]) -> str: +def __prepare_node_view(capture_uuid: str, nodes: set[str], from_popup: bool=False) -> str: to_return = f'The capture contains this value in {len(nodes)} nodes, click below to see them on the tree:' to_return += '
    ' for node in nodes: - to_return += f'
  • {node}
  • ' + to_return += '
  • ' + if from_popup: + to_return += f"""{node}""" + else: + to_return += f'{node}' + to_return += '
  • ' to_return += '
' return to_return @@ -1983,6 +1985,40 @@ def post_table(table_name: str, value: str) -> Response: start = request.form.get('start', type=int) length = request.form.get('length', type=int) captures: list[tuple[str, str, datetime, str, str]] | list[tuple[str, str, str, datetime, set[str]]] | list[tuple[str, str, str, datetime]] + if table_name == 'HHHDetailsTable': + hhh = value.strip() + total, captures = get_hhh_investigator(hhh, offset=start, limit=length) + prepared_captures = [] + for capture_uuid, title, capture_time, hostnode_uuid, url in captures: + to_append = { + 'capture_time': capture_time.isoformat(), + 'url': f"""{url}""" + } + if from_popup: + to_append['capture_title'] = f""" {title}""" + else: + to_append['capture_title'] = f"""{title}""" + prepared_captures.append(to_append) + return jsonify({'draw': draw, 'recordsTotal': total, 'recordsFiltered': total, 'data': prepared_captures}) + + if table_name == 'cookieNameTable': + cookie_name = value.strip() + total, captures = get_cookie_name_investigator(cookie_name, offset=start, limit=length) + prepared_captures = [] + for capture_uuid, title, landing_page, capture_time, nodes in captures: + _nodes = __prepare_node_view(capture_uuid, nodes, from_popup) + to_append = { + 'capture_time': capture_time.isoformat(), + 'landing_page': f"""{landing_page}""" + } + if from_popup: + to_append['capture_title'] = f"""{title}""" + else: + to_append['capture_title'] = f"""{title}""" + to_append['capture_title'] += f'
{_nodes}' + prepared_captures.append(to_append) + return jsonify({'draw': draw, 'recordsTotal': total, 'recordsFiltered': total, 'data': prepared_captures}) + if table_name == 'bodyHashDetailsTable': body_hash = value.strip() total, captures = _get_body_hash_investigator(body_hash, offset=start, limit=length) @@ -1993,7 +2029,7 @@ def post_table(table_name: str, value: str) -> Response: 'url': f"""{url}""" } if from_popup: - to_append['capture_title'] = f"""""" + to_append['capture_title'] = f""" {title}""" else: to_append['capture_title'] = f"""{title}""" prepared_captures.append(to_append) diff --git a/website/web/sri.txt b/website/web/sri.txt index 143dcff6..a682c9d3 100644 --- a/website/web/sri.txt +++ b/website/web/sri.txt @@ -33,7 +33,7 @@ "loader.gif": "ZZKD5vLSKBWKeUpa2KI9qheUJ49iTI/UULmVU/AX28fBfH00K3lLc2v5pVJZ4qXG1BbB13LTXzRKKU35H2XfNg==", "lookyloo.jpeg": "i6wBj8CsIM5YAQLEMQfhs3CNOSKkErF8AMqqM6ZygSwCyQgv9CU8xt94veMZhM/ufBWoz7kAXmR+yywmxsTxug==", "redirect.png": "PAjzlPV97rEFvH55mG1ZC9wRl98be3yMeX/nENuFkJcds6/AXgSR2ig/QyPULgobSnNgiYieLVWY/oqsgeywrQ==", - "render_tables.js": "hG5hQVtegWd8gc4HX/iFR8+YsUjLxTL/XbkbFQrombrVLQrR+yH0sGM6wq0PE/6hLbetOfrXnf91Kp1gBsm6bg==", + "render_tables.js": "SzrpJYC5mvecw2s/hm8Sh+mDnDR7Ygqzw2f2m3ysSd0crwfDf71jvXvynDbbL7ECVDK4TsAd7kdzz+lUNQLYqA==", "secure.svg": "H8ni7t0d60nCJDVGuZpuxC+RBy/ipAjWT627D12HlZGg6LUmjSwPTQTUekm3UJupEP7TUkhXyq6WHc5gy7QBjg==", "stats.css": "/kY943FwWBTne4IIyf7iBROSfbGd82TeBicEXqKkRwawMVRIvM/Pk5MRa7okUyGIxaDjFQGmV/U1vy+PhN6Jbw==", "stats_graph.js": "S/sMNQK1UMMLD0xQeEa7sq3ce8o6oPxwxGlyKVtaHOODjair86dbBDm7cu6pa/elMRDJT1j09jEFjWp+5GbhTw==", diff --git a/website/web/static/render_tables.js b/website/web/static/render_tables.js index 5bd19b7a..4c4cc94d 100644 --- a/website/web/static/render_tables.js +++ b/website/web/static/render_tables.js @@ -1,11 +1,38 @@ ["DOMContentLoaded", "shown.bs.modal", "jquery.modal.rendered"].forEach(e => window.addEventListener(e, function() { + if (document.getElementById('HHHDetailsTable')) { + hhh = document.getElementById('HHHDetailsTable').dataset.hhh; + new DataTable('#HHHDetailsTable', { + processing: true, + serverSide: true, + retrieve: true, + drawCallback: function (settings) { newTabClickListener() }, + ajax: { + url: `/tables/HHHDetailsTable/${hhh}${window.location.search}`, + type: 'POST' + }, + columns : [ + { data: 'capture_time' }, + { data: 'capture_title' }, + { data: 'url' } + ], + order: [[ 0, "desc" ]], + columnDefs: [{ width: '20%', targets: 0, + render: (data) => { + const date = new Date(data); + return date.getFullYear() + '-' + (date.getMonth() + 1).toString().padStart(2, "0") + '-' + date.getDate().toString().padStart(2, "0") + ' ' + date.toTimeString(); + } + }, + { width: '40%', targets: 1 }, + { width: '40%', targets: 2 }], + }) + } if (document.getElementById('bodyHashDetailsTable')) { bodyhash = document.getElementById('bodyHashDetailsTable').dataset.bodyhash; new DataTable('#bodyHashDetailsTable', { processing: true, serverSide: true, retrieve: true, - drawCallback: newTabClickListener(), + drawCallback: function (settings) { newTabClickListener() }, ajax: { url: `/tables/bodyHashDetailsTable/${bodyhash}${window.location.search}`, type: 'POST' @@ -32,7 +59,7 @@ processing: true, serverSide: true, retrieve: true, - drawCallback: newTabClickListener(), + drawCallback: function (settings) { newTabClickListener() }, ajax: { url: `/tables/hashTypeDetailsTable/${hash_value}`, type: 'POST' @@ -62,7 +89,7 @@ processing: true, serverSide: true, retrieve: true, - drawCallback: newTabClickListener(), + drawCallback: function (settings) { newTabClickListener() }, ajax: { url: `/tables/identifierDetailsTable/${identifier_value}`, type: 'POST' @@ -89,7 +116,7 @@ if (document.getElementById('bodyHashesTable')) { new DataTable('#bodyHashesTable', { retrieve: true, - drawCallback: newTabClickListener(), + drawCallback: function (settings) { newTabClickListener() }, order: [[ 0, "desc" ]], columnDefs: [{ width: '10%', targets: 0 }, { width: '10%', targets: 1 }, @@ -105,7 +132,7 @@ if (document.getElementById('faviconsTable')) { new DataTable('#faviconsTable', { retrieve: true, - drawCallback: newTabClickListener(), + drawCallback: function (settings) { newTabClickListener() }, columnDefs: [{ width: '10%', targets: 0 }, { width: '40%', targets: 1 }, { width: '40%', targets: 2 }, @@ -115,7 +142,7 @@ if (document.getElementById('treeHashesTable')) { new DataTable('#treeHashesTable', { retrieve: true, - drawCallback: newTabClickListener(), + drawCallback: function (settings) { newTabClickListener() }, columnDefs: [{ width: '20%', targets: 0 }, { width: '40%', targets: 1 }, { width: '40%', targets: 2 }], @@ -124,7 +151,7 @@ if (document.getElementById('hostnamesTable')) { new DataTable('#hostnamesTable', { retrieve: true, - drawCallback: newTabClickListener(), + drawCallback: function (settings) { newTabClickListener() }, order: [[ 0, "desc" ]], columnDefs: [{ width: '10%', targets: 0 }, { width: '40%', targets: 1 }, @@ -139,7 +166,7 @@ if (document.getElementById('identifiersTable')) { new DataTable('#identifiersTable', { retrieve: true, - drawCallback: newTabClickListener(), + drawCallback: function (settings) { newTabClickListener() }, columnDefs: [{ width: '20%', targets: 0 }, { width: '40%', targets: 1 }, { width: '40%', targets: 2 }], @@ -148,7 +175,7 @@ if (document.getElementById('urlsTable')) { new DataTable('#urlsTable', { retrieve: true, - drawCallback: newTabClickListener(), + drawCallback: function (settings) { newTabClickListener() }, order: [[ 0, "desc" ]], columnDefs: [{ width: '10%', targets: 0 }, { width: '90%', targets: 1 }], @@ -160,17 +187,30 @@ }); } if (document.getElementById('cookieNameTable')) { + cookieName = document.getElementById('cookieNameTable').dataset.cookiename; new DataTable('#cookieNameTable', { + processing: true, + serverSide: true, retrieve: true, - drawCallback: newTabClickListener(), + drawCallback: function (settings) { newTabClickListener() }, + ajax: { + url: `/tables/cookieNameTable/${cookieName}${window.location.search}`, + type: 'POST' + }, + columns : [ + { data: 'capture_time' }, + { data: 'capture_title' }, + { data: 'landing_page' } + ], order: [[ 0, "desc" ]], - columnDefs: [{ width: '30%', targets: 0, + columnDefs: [{ width: '20%', targets: 0, render: (data) => { const date = new Date(data); return date.getFullYear() + '-' + (date.getMonth() + 1).toString().padStart(2, "0") + '-' + date.getDate().toString().padStart(2, "0") + ' ' + date.toTimeString(); } }, - { width: '70%', targets: 1 }] + { width: '40%', targets: 1 }, + { width: '40%', targets: 2 }] }); } @@ -180,7 +220,7 @@ processing: true, serverSide: true, retrieve: true, - drawCallback: newTabClickListener(), + drawCallback: function (settings) { newTabClickListener() }, ajax: { url: `/tables/hostnameTable/${hostname}`, type: 'POST' @@ -208,7 +248,7 @@ processing: true, serverSide: true, retrieve: true, - drawCallback: newTabClickListener(), + drawCallback: function (settings) { newTabClickListener() }, ajax: { url: `/tables/urlTable/${url}`, type: 'POST' @@ -236,7 +276,7 @@ processing: true, serverSide: true, retrieve: true, - drawCallback: newTabClickListener(), + drawCallback: function (settings) { newTabClickListener() }, ajax: { url: `/tables/faviconDetailsTable/${favicon}`, type: 'POST' diff --git a/website/web/templates/cookie_name.html b/website/web/templates/cookie_name.html index edbda8fb..4d4b087e 100644 --- a/website/web/templates/cookie_name.html +++ b/website/web/templates/cookie_name.html @@ -19,38 +19,15 @@

{{ cookie_name }}

-
Only the most recent captures are listed below, this will change soon.
- +
+ - - {% for capture_uuid, title, capture_time, nodes in captures %} - - - - - {% endfor %} -
Capture Time Capture TitleLanding Page
- {{capture_time}} - - - {{ title }} - -
- The capture contains this URL in {{ nodes|length }} nodes, click below to see them on the tree: -
    - {% for node in nodes %} -
  • - -
  • - {% endfor %} -
-
{% endblock %} diff --git a/website/web/templates/hhh_details.html b/website/web/templates/hhh_details.html index 2ada53de..72f360fd 100644 --- a/website/web/templates/hhh_details.html +++ b/website/web/templates/hhh_details.html @@ -32,27 +32,14 @@

The same HTTP Headers Hash was seen in these captures:

    - +
    + - + - - {% for capture_uuid, hostnode_uuid, url, title in captures %} - - - - - {% endfor %} -
    Capture Time Capture TitleURL matching the HHHURL
    - {% if from_popup %} - - {% else %} - {{ title }} - {% endif %} - {{url}}
diff --git a/website/web/templates/macros.html b/website/web/templates/macros.html index 6b2dbdd1..6d8f69cb 100644 --- a/website/web/templates/macros.html +++ b/website/web/templates/macros.html @@ -283,12 +283,12 @@ {% for detail in details %} {% if detail|length == 1 %}
  • - {{ detail[0] }}: + {{ detail[0] }}: {{ cookie_name_value[0] }}={{ shorten_string(cookie_name_value[1], 200) }}
  • {% else %}
  • - {{ detail[0] }}: + {{ detail[0] }}: {{ cookie_name_value[0] }}={{ shorten_string(cookie_name_value[1], 200) }} -
    {{ button_text }}