new: Get any supported hash algorithm over the API

pull/303/head
Raphaël Vinot 2021-12-02 17:55:02 +01:00
parent c9307b5159
commit 8cceb948f3
2 changed files with 21 additions and 5 deletions

View File

@ -724,16 +724,14 @@ class Lookyloo():
to_return[event_id].update(values) to_return[event_id].update(values)
return to_return return to_return
def get_hashes_with_context(self, tree_uuid: str, /, algorithm: str, *, hashes_only: bool=False, urls_only: bool=False): def get_hashes_with_context(self, tree_uuid: str, /, algorithm: str, *, urls_only: bool=False) -> Union[Dict[str, Set[str]], Dict[str, List[URLNode]]]:
"""Build (on demand) hashes for all the ressources of the tree, using the alorighm provided by the user. """Build (on demand) hashes for all the ressources of the tree, using the alorighm provided by the user.
If you just want the hashes in SHA512, use the get_hashes method, it gives you a list of hashes an they're build If you just want the hashes in SHA512, use the get_hashes method, it gives you a list of hashes an they're build
with the tree. This method is computing the hashes when you query it, so it is slower.""" with the tree. This method is computing the hashes when you query it, so it is slower."""
ct = self.get_crawled_tree(tree_uuid) ct = self.get_crawled_tree(tree_uuid)
hashes = ct.root_hartree.build_all_hashes(algorithm) hashes = ct.root_hartree.build_all_hashes(algorithm)
if hashes_only:
return list(hashes.keys())
if urls_only: if urls_only:
return {h: [node.name for node in nodes] for h, nodes in hashes.items()} return {h: set(node.name for node in nodes) for h, nodes in hashes.items()}
return hashes return hashes
def merge_hashlookup_tree(self, tree_uuid: str, /) -> Tuple[Dict[str, Dict[str, Any]], int]: def merge_hashlookup_tree(self, tree_uuid: str, /) -> Tuple[Dict[str, Dict[str, Any]], int]:

View File

@ -2,6 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import base64 import base64
import hashlib
import json import json
from typing import Any, Dict from typing import Any, Dict
@ -102,11 +103,28 @@ class CaptureURLs(Resource):
@api.doc(description='Get all the hashes of all the resources of a capture', @api.doc(description='Get all the hashes of all the resources of a capture',
params={'capture_uuid': 'The UUID of the capture'}) params={'capture_uuid': 'The UUID of the capture'})
class CaptureHashes(Resource): class CaptureHashes(Resource):
# Note: shake algos require a length for the digest, discarding them.
supported_hash_algos = [algo for algo in hashlib.algorithms_available if not algo.startswith('shake')]
# NOTE: the SHA512 hashes are pre-computed in the tree, anything else must be computed on the spot
# so we return the SHA512 hashes by default
@api.param('algorithm', default='sha512', description=f'Algorithm of the hashes (default: sha512). Supported options: {", ".join(supported_hash_algos)}')
@api.param('hashes_only', default=1, description='If 1 (default), only returns a list hashes instead of a dictionary of hashes with their respective URLs..')
def get(self, capture_uuid: str): def get(self, capture_uuid: str):
cache = lookyloo.capture_cache(capture_uuid) cache = lookyloo.capture_cache(capture_uuid)
if not cache: if not cache:
return {'error': 'UUID missing in cache, try again later.'}, 400 return {'error': 'UUID missing in cache, try again later.'}, 400
to_return: Dict[str, Any] = {'response': {'hashes': list(lookyloo.get_hashes(capture_uuid))}}
algorithm = request.args['algorithm'].lower() if request.args.get('algorithm') else 'sha512'
hashes_only = False if 'hashes_only' in request.args and request.args['hashes_only'] in [0, '0'] else True
if algorithm == 'sha512' and hashes_only:
to_return: Dict[str, Any] = {'response': {'hashes': list(lookyloo.get_hashes(capture_uuid))}}
else:
hashes = lookyloo.get_hashes_with_context(capture_uuid, algorithm=algorithm, urls_only=True)
to_return = {'response': {'hashes': list(hashes.keys())}}
if not hashes_only:
to_return['response']['hashes_with_urls'] = {h: list(urls) for h, urls in hashes.items()}
return to_return return to_return