From 599ae9f9f3a336616ea64154aa9fd93056bd0cfb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Tue, 31 Jan 2023 11:22:43 +0100 Subject: [PATCH] new: Compare ressources URLs --- lookyloo/comparator.py | 83 ++++++++++++++++++++++----------------- website/web/genericapi.py | 11 +++--- 2 files changed, 52 insertions(+), 42 deletions(-) diff --git a/lookyloo/comparator.py b/lookyloo/comparator.py index 67174f3d..881e96d3 100644 --- a/lookyloo/comparator.py +++ b/lookyloo/comparator.py @@ -29,89 +29,100 @@ class Comparator(): def redis(self): return Redis(connection_pool=self.redis_pool) - def compare_nodes(self, one, two, /) -> Dict[str, Any]: + def compare_nodes(self, left, right, /) -> Dict[str, Any]: to_return = {} # URL - if one.name != two.name: + if left.name != right.name: to_return['url'] = {'message': 'The nodes have different URLs.', - 'details': [one.name, two.name]} + 'details': [left.name, right.name]} # Hostname - if one.hostname != two.hostname: + if left.hostname != right.hostname: to_return['hostname'] = {'message': 'The nodes have different hostnames.', - 'details': [one.hostname, two.hostname]} + 'details': [left.hostname, right.hostname]} else: to_return['hostname'] = {'message': 'The nodes have the same hostname.', - 'details': one.hostname} + 'details': left.hostname} else: to_return['url'] = {'message': 'The nodes have the same URL.', - 'details': one.name} + 'details': left.name} # IP in HAR - if one.ip_address != two.ip_address: + if left.ip_address != right.ip_address: to_return['ip'] = {'message': 'The nodes load content from different IPs.', - 'details': [str(one.ip_address), str(two.ip_address)]} + 'details': [str(left.ip_address), str(right.ip_address)]} else: to_return['ip'] = {'message': 'The nodes load content from the same IP.', - 'details': str(one.ip_address)} + 'details': str(left.ip_address)} # IPs in hostnode + ASNs return to_return - def compare_captures(self, capture_one, capture_two, /) -> Dict[str, Any]: - if capture_one not in self._captures_index: - raise MissingUUID(f'{capture_one} does not exists.') - if capture_two not in self._captures_index: - raise MissingUUID(f'{capture_two} does not exists.') + def compare_captures(self, capture_left, capture_right, /) -> Dict[str, Any]: + if capture_left not in self._captures_index: + raise MissingUUID(f'{capture_left} does not exists.') + if capture_right not in self._captures_index: + raise MissingUUID(f'{capture_right} does not exists.') to_return: Dict[str, Dict[str, Union[str, List[Union[str, Dict[str, Any]]], Dict[str, Union[int, str, List[Union[int, str, Dict[str, Any]]]]]]]] = {} - one = self._captures_index[capture_one] - two = self._captures_index[capture_two] + left = self._captures_index[capture_left] + right = self._captures_index[capture_right] # Compare initial URL (first entry in HAR) - if one.tree.root_url != two.tree.root_url: + if left.tree.root_url != right.tree.root_url: to_return['root_url'] = {'message': 'The captures are for different URLs.', - 'details': [one.tree.root_url, two.tree.root_url]} + 'details': [left.tree.root_url, right.tree.root_url]} else: to_return['root_url'] = {'message': 'The captures are the same URL.', - 'details': one.tree.root_url} + 'details': left.tree.root_url} # Compare landing page (URL in browser) - if one.tree.root_hartree.har.final_redirect != two.tree.root_hartree.har.final_redirect: + if left.tree.root_hartree.har.final_redirect != right.tree.root_hartree.har.final_redirect: to_return['final_url'] = {'message': 'The landing page is different.', - 'details': [one.tree.root_hartree.har.final_redirect, two.tree.root_hartree.har.final_redirect]} + 'details': [left.tree.root_hartree.har.final_redirect, right.tree.root_hartree.har.final_redirect]} # => if different, check if the hostname is the same - if one.tree.root_hartree.rendered_node.hostname != two.tree.root_hartree.rendered_node.hostname: + if left.tree.root_hartree.rendered_node.hostname != right.tree.root_hartree.rendered_node.hostname: to_return['final_hostname'] = {'message': 'The hostname of the rendered page is different.', - 'details': [one.tree.root_hartree.rendered_node.hostname, two.tree.root_hartree.rendered_node.hostname]} + 'details': [left.tree.root_hartree.rendered_node.hostname, right.tree.root_hartree.rendered_node.hostname]} else: to_return['final_hostname'] = {'message': 'The hostname of the rendered page is the same.', - 'details': one.tree.root_hartree.rendered_node.hostname} + 'details': left.tree.root_hartree.rendered_node.hostname} else: to_return['final_url'] = {'message': 'The landing page is the same.', - 'details': one.tree.root_hartree.har.final_redirect} + 'details': left.tree.root_hartree.har.final_redirect} - if one.tree.root_hartree.rendered_node.response['status'] != two.tree.root_hartree.rendered_node.response['status']: + if left.tree.root_hartree.rendered_node.response['status'] != right.tree.root_hartree.rendered_node.response['status']: to_return['final_status_code'] = {'message': 'The status code of the rendered page is different.', - 'details': [one.tree.root_hartree.rendered_node.response['status'], two.tree.root_hartree.rendered_node.response['status']]} + 'details': [left.tree.root_hartree.rendered_node.response['status'], right.tree.root_hartree.rendered_node.response['status']]} else: to_return['final_status_code'] = {'message': 'The status code of the rendered page is the same.', - 'details': one.tree.root_hartree.rendered_node.response['status']} + 'details': left.tree.root_hartree.rendered_node.response['status']} to_return['redirects'] = {'length': {}, 'nodes': []} - if len(one.tree.redirects) != len(two.tree.redirects): + if len(left.tree.redirects) != len(right.tree.redirects): to_return['redirects']['length'] = {'message': 'The captures have a different amount of redirects', - 'details': [len(one.tree.redirects), len(two.tree.redirects)]} + 'details': [len(left.tree.redirects), len(right.tree.redirects)]} else: to_return['redirects']['length'] = {'message': 'The captures have the same number of redirects', - 'details': len(one.tree.redirects)} + 'details': len(left.tree.redirects)} # Compare chain of redirects - redirect_nodes_one = [a for a in reversed(one.tree.root_hartree.rendered_node.get_ancestors())] + [one.tree.root_hartree.rendered_node] - redirect_nodes_two = [a for a in reversed(two.tree.root_hartree.rendered_node.get_ancestors())] + [two.tree.root_hartree.rendered_node] - for redirect_one, redirect_two in zip(redirect_nodes_one, redirect_nodes_two): + redirect_nodes_left = [a for a in reversed(left.tree.root_hartree.rendered_node.get_ancestors())] + [left.tree.root_hartree.rendered_node] + redirect_nodes_right = [a for a in reversed(right.tree.root_hartree.rendered_node.get_ancestors())] + [right.tree.root_hartree.rendered_node] + for redirect_left, redirect_right in zip(redirect_nodes_left, redirect_nodes_right): if isinstance(to_return['redirects']['nodes'], list): - to_return['redirects']['nodes'].append(self.compare_nodes(redirect_one, redirect_two)) + to_return['redirects']['nodes'].append(self.compare_nodes(redirect_left, redirect_right)) + + # Compare all ressources URLs + to_return['ressources'] = {} + ressources_left = {a.name for a in left.tree.root_hartree.rendered_node.traverse()} + ressources_right = {a.name for a in right.tree.root_hartree.rendered_node.traverse()} + if present_in_both := ressources_left & ressources_right: + to_return['ressources']['both'] = sorted(present_in_both) + if present_left := ressources_left - ressources_right: + to_return['ressources']['left'] = sorted(present_left) + if present_right := ressources_right - ressources_left: + to_return['ressources']['right'] = sorted(present_right) # IP/ASN checks - Note: there is the IP in the HAR, and the ones resolved manually - if the IP is different, but part of the list, it's cool # For each node up to the landing page diff --git a/website/web/genericapi.py b/website/web/genericapi.py index 18fd9bd6..3441f22c 100644 --- a/website/web/genericapi.py +++ b/website/web/genericapi.py @@ -21,7 +21,7 @@ api = Namespace('GenericAPI', description='Generic Lookyloo API', path='/') lookyloo: Lookyloo = Lookyloo() -comaprator: Comparator = Comparator() +comparator: Comparator = Comparator() def api_auth_check(method): @@ -444,19 +444,18 @@ class CaptureExport(Resource): # Compare captures (WiP) compare_captures_fields = api.model('CompareCapturesFields', { - 'capture_one': fields.String(description="The first capture to compare.", required=True), - 'capture_two': fields.String(description="The second capture to compare.", required=True), + 'capture_left': fields.String(description="Left capture to compare.", required=True), + 'capture_right': fields.String(description="Right capture to compare.", required=True), }) @api.route('/json/compare_captures') -@api.doc(description='Compare two captures (WiP)') +@api.doc(description='Compare two captures') class CompareCaptures(Resource): @api.doc(body=compare_captures_fields) def post(self): parameters: Dict = request.get_json(force=True) # type: ignore - result = comaprator.compare_captures(parameters.get('capture_one'), parameters.get('capture_two')) - print(result) + result = comparator.compare_captures(parameters.get('capture_left'), parameters.get('capture_right')) return result