From 8cceb948f38a5533e35df4003a3cd5d31a3f2b49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Thu, 2 Dec 2021 17:55:02 +0100 Subject: [PATCH] new: Get any supported hash algorithm over the API --- lookyloo/lookyloo.py | 6 ++---- website/web/genericapi.py | 20 +++++++++++++++++++- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index bf6e4bb..9a9fc46 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -724,16 +724,14 @@ class Lookyloo(): to_return[event_id].update(values) return to_return - def get_hashes_with_context(self, tree_uuid: str, /, algorithm: str, *, hashes_only: bool=False, urls_only: bool=False): + def get_hashes_with_context(self, tree_uuid: str, /, algorithm: str, *, urls_only: bool=False) -> Union[Dict[str, Set[str]], Dict[str, List[URLNode]]]: """Build (on demand) hashes for all the ressources of the tree, using the alorighm provided by the user. If you just want the hashes in SHA512, use the get_hashes method, it gives you a list of hashes an they're build with the tree. This method is computing the hashes when you query it, so it is slower.""" ct = self.get_crawled_tree(tree_uuid) hashes = ct.root_hartree.build_all_hashes(algorithm) - if hashes_only: - return list(hashes.keys()) if urls_only: - return {h: [node.name for node in nodes] for h, nodes in hashes.items()} + return {h: set(node.name for node in nodes) for h, nodes in hashes.items()} return hashes def merge_hashlookup_tree(self, tree_uuid: str, /) -> Tuple[Dict[str, Dict[str, Any]], int]: diff --git a/website/web/genericapi.py b/website/web/genericapi.py index de6ffb2..d93c91e 100644 --- a/website/web/genericapi.py +++ b/website/web/genericapi.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- import base64 +import hashlib import json from typing import Any, Dict @@ -102,11 +103,28 @@ class CaptureURLs(Resource): @api.doc(description='Get all the hashes of all the resources of a capture', params={'capture_uuid': 'The UUID of the capture'}) class CaptureHashes(Resource): + # Note: shake algos require a length for the digest, discarding them. + supported_hash_algos = [algo for algo in hashlib.algorithms_available if not algo.startswith('shake')] + + # NOTE: the SHA512 hashes are pre-computed in the tree, anything else must be computed on the spot + # so we return the SHA512 hashes by default + + @api.param('algorithm', default='sha512', description=f'Algorithm of the hashes (default: sha512). Supported options: {", ".join(supported_hash_algos)}') + @api.param('hashes_only', default=1, description='If 1 (default), only returns a list hashes instead of a dictionary of hashes with their respective URLs..') def get(self, capture_uuid: str): cache = lookyloo.capture_cache(capture_uuid) if not cache: return {'error': 'UUID missing in cache, try again later.'}, 400 - to_return: Dict[str, Any] = {'response': {'hashes': list(lookyloo.get_hashes(capture_uuid))}} + + algorithm = request.args['algorithm'].lower() if request.args.get('algorithm') else 'sha512' + hashes_only = False if 'hashes_only' in request.args and request.args['hashes_only'] in [0, '0'] else True + if algorithm == 'sha512' and hashes_only: + to_return: Dict[str, Any] = {'response': {'hashes': list(lookyloo.get_hashes(capture_uuid))}} + else: + hashes = lookyloo.get_hashes_with_context(capture_uuid, algorithm=algorithm, urls_only=True) + to_return = {'response': {'hashes': list(hashes.keys())}} + if not hashes_only: + to_return['response']['hashes_with_urls'] = {h: list(urls) for h, urls in hashes.items()} return to_return