new: Get any supported hash algorithm over the API

pull/303/head
Raphaël Vinot 2021-12-02 17:55:02 +01:00
parent c9307b5159
commit 8cceb948f3
2 changed files with 21 additions and 5 deletions

View File

@ -724,16 +724,14 @@ class Lookyloo():
to_return[event_id].update(values)
return to_return
def get_hashes_with_context(self, tree_uuid: str, /, algorithm: str, *, hashes_only: bool=False, urls_only: bool=False):
def get_hashes_with_context(self, tree_uuid: str, /, algorithm: str, *, urls_only: bool=False) -> Union[Dict[str, Set[str]], Dict[str, List[URLNode]]]:
"""Build (on demand) hashes for all the ressources of the tree, using the alorighm provided by the user.
If you just want the hashes in SHA512, use the get_hashes method, it gives you a list of hashes an they're build
with the tree. This method is computing the hashes when you query it, so it is slower."""
ct = self.get_crawled_tree(tree_uuid)
hashes = ct.root_hartree.build_all_hashes(algorithm)
if hashes_only:
return list(hashes.keys())
if urls_only:
return {h: [node.name for node in nodes] for h, nodes in hashes.items()}
return {h: set(node.name for node in nodes) for h, nodes in hashes.items()}
return hashes
def merge_hashlookup_tree(self, tree_uuid: str, /) -> Tuple[Dict[str, Dict[str, Any]], int]:

View File

@ -2,6 +2,7 @@
# -*- coding: utf-8 -*-
import base64
import hashlib
import json
from typing import Any, Dict
@ -102,11 +103,28 @@ class CaptureURLs(Resource):
@api.doc(description='Get all the hashes of all the resources of a capture',
params={'capture_uuid': 'The UUID of the capture'})
class CaptureHashes(Resource):
# Note: shake algos require a length for the digest, discarding them.
supported_hash_algos = [algo for algo in hashlib.algorithms_available if not algo.startswith('shake')]
# NOTE: the SHA512 hashes are pre-computed in the tree, anything else must be computed on the spot
# so we return the SHA512 hashes by default
@api.param('algorithm', default='sha512', description=f'Algorithm of the hashes (default: sha512). Supported options: {", ".join(supported_hash_algos)}')
@api.param('hashes_only', default=1, description='If 1 (default), only returns a list hashes instead of a dictionary of hashes with their respective URLs..')
def get(self, capture_uuid: str):
cache = lookyloo.capture_cache(capture_uuid)
if not cache:
return {'error': 'UUID missing in cache, try again later.'}, 400
to_return: Dict[str, Any] = {'response': {'hashes': list(lookyloo.get_hashes(capture_uuid))}}
algorithm = request.args['algorithm'].lower() if request.args.get('algorithm') else 'sha512'
hashes_only = False if 'hashes_only' in request.args and request.args['hashes_only'] in [0, '0'] else True
if algorithm == 'sha512' and hashes_only:
to_return: Dict[str, Any] = {'response': {'hashes': list(lookyloo.get_hashes(capture_uuid))}}
else:
hashes = lookyloo.get_hashes_with_context(capture_uuid, algorithm=algorithm, urls_only=True)
to_return = {'response': {'hashes': list(hashes.keys())}}
if not hashes_only:
to_return['response']['hashes_with_urls'] = {h: list(urls) for h, urls in hashes.items()}
return to_return