mirror of https://github.com/CIRCL/lookyloo
new: Get any supported hash algorithm over the API
parent
c9307b5159
commit
8cceb948f3
|
@ -724,16 +724,14 @@ class Lookyloo():
|
|||
to_return[event_id].update(values)
|
||||
return to_return
|
||||
|
||||
def get_hashes_with_context(self, tree_uuid: str, /, algorithm: str, *, hashes_only: bool=False, urls_only: bool=False):
|
||||
def get_hashes_with_context(self, tree_uuid: str, /, algorithm: str, *, urls_only: bool=False) -> Union[Dict[str, Set[str]], Dict[str, List[URLNode]]]:
|
||||
"""Build (on demand) hashes for all the ressources of the tree, using the alorighm provided by the user.
|
||||
If you just want the hashes in SHA512, use the get_hashes method, it gives you a list of hashes an they're build
|
||||
with the tree. This method is computing the hashes when you query it, so it is slower."""
|
||||
ct = self.get_crawled_tree(tree_uuid)
|
||||
hashes = ct.root_hartree.build_all_hashes(algorithm)
|
||||
if hashes_only:
|
||||
return list(hashes.keys())
|
||||
if urls_only:
|
||||
return {h: [node.name for node in nodes] for h, nodes in hashes.items()}
|
||||
return {h: set(node.name for node in nodes) for h, nodes in hashes.items()}
|
||||
return hashes
|
||||
|
||||
def merge_hashlookup_tree(self, tree_uuid: str, /) -> Tuple[Dict[str, Dict[str, Any]], int]:
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
import json
|
||||
from typing import Any, Dict
|
||||
|
||||
|
@ -102,11 +103,28 @@ class CaptureURLs(Resource):
|
|||
@api.doc(description='Get all the hashes of all the resources of a capture',
|
||||
params={'capture_uuid': 'The UUID of the capture'})
|
||||
class CaptureHashes(Resource):
|
||||
# Note: shake algos require a length for the digest, discarding them.
|
||||
supported_hash_algos = [algo for algo in hashlib.algorithms_available if not algo.startswith('shake')]
|
||||
|
||||
# NOTE: the SHA512 hashes are pre-computed in the tree, anything else must be computed on the spot
|
||||
# so we return the SHA512 hashes by default
|
||||
|
||||
@api.param('algorithm', default='sha512', description=f'Algorithm of the hashes (default: sha512). Supported options: {", ".join(supported_hash_algos)}')
|
||||
@api.param('hashes_only', default=1, description='If 1 (default), only returns a list hashes instead of a dictionary of hashes with their respective URLs..')
|
||||
def get(self, capture_uuid: str):
|
||||
cache = lookyloo.capture_cache(capture_uuid)
|
||||
if not cache:
|
||||
return {'error': 'UUID missing in cache, try again later.'}, 400
|
||||
to_return: Dict[str, Any] = {'response': {'hashes': list(lookyloo.get_hashes(capture_uuid))}}
|
||||
|
||||
algorithm = request.args['algorithm'].lower() if request.args.get('algorithm') else 'sha512'
|
||||
hashes_only = False if 'hashes_only' in request.args and request.args['hashes_only'] in [0, '0'] else True
|
||||
if algorithm == 'sha512' and hashes_only:
|
||||
to_return: Dict[str, Any] = {'response': {'hashes': list(lookyloo.get_hashes(capture_uuid))}}
|
||||
else:
|
||||
hashes = lookyloo.get_hashes_with_context(capture_uuid, algorithm=algorithm, urls_only=True)
|
||||
to_return = {'response': {'hashes': list(hashes.keys())}}
|
||||
if not hashes_only:
|
||||
to_return['response']['hashes_with_urls'] = {h: list(urls) for h, urls in hashes.items()}
|
||||
return to_return
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue