new: Paginate domhash

pull/1005/head
Raphaël Vinot 2024-11-28 13:54:32 +01:00
parent 18da251589
commit 422151a215
6 changed files with 42 additions and 34 deletions

View File

@ -770,17 +770,19 @@ class Indexing():
return to_return return to_return
def get_captures_hash_type(self, hash_type: str, h: str, most_recent_capture: datetime | None = None, def get_captures_hash_type(self, hash_type: str, h: str, most_recent_capture: datetime | None = None,
oldest_capture: datetime | None= None) -> list[tuple[str, float]]: oldest_capture: datetime | None= None,
offset: int | None=None, limit: int | None=None) -> tuple[int, list[tuple[str, float]]]:
"""Get all the captures for a hash of a specific type, on a time interval starting from the most recent one. """Get all the captures for a hash of a specific type, on a time interval starting from the most recent one.
:param hash_type: The type of hash :param hash_type: The type of hash
:param h: The hash :param h: The hash
:param most_recent_capture: The capture time of the most recent capture to consider :param most_recent_capture: The capture time of the most recent capture to consider
:param oldest_capture: The capture time of the oldest capture to consider, defaults to 20 days ago. :param oldest_capture: The capture time of the oldest capture to consider.
""" """
max_score: str | float = most_recent_capture.timestamp() if most_recent_capture else '+Inf' max_score: str | float = most_recent_capture.timestamp() if most_recent_capture else '+Inf'
min_score: str | float = oldest_capture.timestamp() if oldest_capture else (datetime.now() - timedelta(days=20)).timestamp() min_score: str | float = oldest_capture.timestamp() if oldest_capture else '-Inf'
return self.redis.zrevrangebyscore(f'capture_hash_types|{hash_type}|{h}|captures', max_score, min_score, withscores=True) total = self.redis.zcard(f'capture_hash_types|{hash_type}|{h}|captures')
return total, self.redis.zrevrangebyscore(f'capture_hash_types|{hash_type}|{h}|captures', max_score, min_score, withscores=True, start=offset, num=limit)
def get_captures_hash_type_count(self, hash_type: str, h: str) -> int: def get_captures_hash_type_count(self, hash_type: str, h: str) -> int:
if hash_type == 'certpl_html_structure_hash': if hash_type == 'certpl_html_structure_hash':

View File

@ -17,6 +17,7 @@ class Cloudflare():
self.logger = logging.getLogger(f'{self.__class__.__name__}') self.logger = logging.getLogger(f'{self.__class__.__name__}')
self.logger.setLevel(get_config('generic', 'loglevel')) self.logger.setLevel(get_config('generic', 'loglevel'))
session = requests.Session() session = requests.Session()
self.available = True
# Get IPv4 # Get IPv4
try: try:
r = session.get('https://www.cloudflare.com/ips-v4', timeout=2) r = session.get('https://www.cloudflare.com/ips-v4', timeout=2)
@ -34,9 +35,9 @@ class Cloudflare():
self.logger.warning(f'Unable to get Cloudflare IPv6 list: {e}') self.logger.warning(f'Unable to get Cloudflare IPv6 list: {e}')
self.available = False self.available = False
self.v4_list = [ipaddress.ip_network(net) for net in ipv4_list.split('\n')] if self.available:
self.v6_list = [ipaddress.ip_network(net) for net in ipv6_list.split('\n')] self.v4_list = [ipaddress.ip_network(net) for net in ipv4_list.split('\n')]
self.available = True self.v6_list = [ipaddress.ip_network(net) for net in ipv6_list.split('\n')]
def ips_lookup(self, ips: set[str]) -> dict[str, bool]: def ips_lookup(self, ips: set[str]) -> dict[str, bool]:
'''Lookup a list of IPs. True means it is a known Cloudflare IP''' '''Lookup a list of IPs. True means it is a known Cloudflare IP'''

View File

@ -434,11 +434,11 @@ def get_identifier_investigator(identifier_type: str, identifier: str) -> list[t
return [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp) for cache in cached_captures] return [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp) for cache in cached_captures]
def get_capture_hash_investigator(hash_type: str, h: str) -> list[tuple[str, str, str, datetime]]: def get_capture_hash_investigator(hash_type: str, h: str, offset: int | None=None, limit: int | None=None) -> tuple[int, list[tuple[str, str, str, datetime]]]:
cached_captures = lookyloo.sorted_capture_cache( total, entries = get_indexing(flask_login.current_user).get_captures_hash_type(hash_type=hash_type, h=h, offset=offset, limit=limit)
[uuid for uuid, _ in get_indexing(flask_login.current_user).get_captures_hash_type(hash_type=hash_type, h=h)], cached_captures = lookyloo.sorted_capture_cache([uuid for uuid, _ in entries])
cached_captures_only=True) captures = [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp) for cache in cached_captures]
return [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp) for cache in cached_captures] return total, captures
def get_favicon_investigator(favicon_sha512: str, offset: int | None=None, limit: int | None=None) -> tuple[int, list[tuple[str, str, str, datetime]]]: def get_favicon_investigator(favicon_sha512: str, offset: int | None=None, limit: int | None=None) -> tuple[int, list[tuple[str, str, str, datetime]]]:
@ -1991,6 +1991,19 @@ def post_table(table_name: str, value: str) -> Response:
start = request.form.get('start', type=int) start = request.form.get('start', type=int)
length = request.form.get('length', type=int) length = request.form.get('length', type=int)
captures: list[tuple[str, str, str, datetime, set[str]]] | list[tuple[str, str, str, datetime]] captures: list[tuple[str, str, str, datetime, set[str]]] | list[tuple[str, str, str, datetime]]
if table_name == 'hashTypeDetailsTable':
hash_type, h = value.strip().split('|')
total, captures = get_capture_hash_investigator(hash_type, h, offset=start, limit=length)
prepared_captures = []
for capture_uuid, title, landing_page, capture_time in captures:
to_append = {
'capture_time': capture_time.isoformat(),
'capture_title': f"""<a href="{url_for('tree', tree_uuid=capture_uuid)}">{title}</a>""",
'landing_page': f"""<span class="d-inline-block text-break" style="max-width: 400px;">{landing_page}</span>"""
}
prepared_captures.append(to_append)
return jsonify({'draw': draw, 'recordsTotal': total, 'recordsFiltered': total, 'data': prepared_captures})
if table_name == 'faviconDetailsTable': if table_name == 'faviconDetailsTable':
total, captures = get_favicon_investigator(value.strip(), offset=start, limit=length) total, captures = get_favicon_investigator(value.strip(), offset=start, limit=length)
prepared_captures = [] prepared_captures = []

View File

@ -33,7 +33,7 @@
"loader.gif": "ZZKD5vLSKBWKeUpa2KI9qheUJ49iTI/UULmVU/AX28fBfH00K3lLc2v5pVJZ4qXG1BbB13LTXzRKKU35H2XfNg==", "loader.gif": "ZZKD5vLSKBWKeUpa2KI9qheUJ49iTI/UULmVU/AX28fBfH00K3lLc2v5pVJZ4qXG1BbB13LTXzRKKU35H2XfNg==",
"lookyloo.jpeg": "i6wBj8CsIM5YAQLEMQfhs3CNOSKkErF8AMqqM6ZygSwCyQgv9CU8xt94veMZhM/ufBWoz7kAXmR+yywmxsTxug==", "lookyloo.jpeg": "i6wBj8CsIM5YAQLEMQfhs3CNOSKkErF8AMqqM6ZygSwCyQgv9CU8xt94veMZhM/ufBWoz7kAXmR+yywmxsTxug==",
"redirect.png": "PAjzlPV97rEFvH55mG1ZC9wRl98be3yMeX/nENuFkJcds6/AXgSR2ig/QyPULgobSnNgiYieLVWY/oqsgeywrQ==", "redirect.png": "PAjzlPV97rEFvH55mG1ZC9wRl98be3yMeX/nENuFkJcds6/AXgSR2ig/QyPULgobSnNgiYieLVWY/oqsgeywrQ==",
"render_tables.js": "L8K+7SHzG6c4kddX4WgUTYIh9qeTPK3k16NWxMiHJt5KFtR2KiC9e9dpMPl+8m3LwNnhlzRwaIYYb/gxuuJbqw==", "render_tables.js": "9+CqbiqwG3SeoIwa/nRPP9RiTZBZU0BNqmXAO5A847rd0gNTpZaB15yDqj0gGG7HZVq/ckv53wN3jfo+Kk1rTw==",
"secure.svg": "H8ni7t0d60nCJDVGuZpuxC+RBy/ipAjWT627D12HlZGg6LUmjSwPTQTUekm3UJupEP7TUkhXyq6WHc5gy7QBjg==", "secure.svg": "H8ni7t0d60nCJDVGuZpuxC+RBy/ipAjWT627D12HlZGg6LUmjSwPTQTUekm3UJupEP7TUkhXyq6WHc5gy7QBjg==",
"stats.css": "/kY943FwWBTne4IIyf7iBROSfbGd82TeBicEXqKkRwawMVRIvM/Pk5MRa7okUyGIxaDjFQGmV/U1vy+PhN6Jbw==", "stats.css": "/kY943FwWBTne4IIyf7iBROSfbGd82TeBicEXqKkRwawMVRIvM/Pk5MRa7okUyGIxaDjFQGmV/U1vy+PhN6Jbw==",
"stats_graph.js": "S/sMNQK1UMMLD0xQeEa7sq3ce8o6oPxwxGlyKVtaHOODjair86dbBDm7cu6pa/elMRDJT1j09jEFjWp+5GbhTw==", "stats_graph.js": "S/sMNQK1UMMLD0xQeEa7sq3ce8o6oPxwxGlyKVtaHOODjair86dbBDm7cu6pa/elMRDJT1j09jEFjWp+5GbhTw==",

View File

@ -15,8 +15,20 @@
}); });
} }
if (document.getElementById('hashTypeDetailsTable')) { if (document.getElementById('hashTypeDetailsTable')) {
hash_value = document.getElementById('hashTypeDetailsTable').dataset.hashvalue;
new DataTable('#hashTypeDetailsTable', { new DataTable('#hashTypeDetailsTable', {
processing: true,
serverSide: true,
retrieve: true, retrieve: true,
ajax: {
url: `/tables/hashTypeDetailsTable/${hash_value}`,
type: 'POST'
},
columns : [
{ data: 'capture_time' },
{ data: 'capture_title' },
{ data: 'landing_page' }
],
order: [[ 0, "desc" ]], order: [[ 0, "desc" ]],
columnDefs: [{ width: '30%', columnDefs: [{ width: '30%',
targets: 0, targets: 0,

View File

@ -1,9 +1,8 @@
{% from "macros.html" import shorten_string %} {% from "macros.html" import shorten_string %}
<center> <center>
<h5>{{hash_type}}: {{h}}</h5> <h5>{{hash_type}}: {{h}}</h5>
<h6>Only the most recent captures are listed below, this will change soon.</h6>
</center> </center>
<table id="hashTypeDetailsTable" class="table table-striped" style="width:100%"> <table id="hashTypeDetailsTable" class="table table-striped" style="width:100%" data-hashvalue="{{hash_type}}|{{h}}">
<thead> <thead>
<tr> <tr>
<th>Capture Time</th> <th>Capture Time</th>
@ -11,23 +10,4 @@
<th>Landing page</th> <th>Landing page</th>
</tr> </tr>
</thead> </thead>
<tbody>
{% for capture_uuid, title, landing_page, capture_time in captures %}
<tr>
<td>
{{capture_time}}
</td>
<td>
<a href="{{ url_for('tree', tree_uuid=capture_uuid) }}">
{{ title }}
</a>
</td>
<td>
<span class="d-inline-block text-break" style="max-width: 400px;">
{{ landing_page }}
</span>
</td>
</tr>
{% endfor %}
</tbody>
</table> </table>