new: Compare ressources URLs

pull/593/head
Raphaël Vinot 2023-01-31 11:22:43 +01:00
parent a1adc810ba
commit 599ae9f9f3
2 changed files with 52 additions and 42 deletions

View File

@ -29,89 +29,100 @@ class Comparator():
def redis(self): def redis(self):
return Redis(connection_pool=self.redis_pool) return Redis(connection_pool=self.redis_pool)
def compare_nodes(self, one, two, /) -> Dict[str, Any]: def compare_nodes(self, left, right, /) -> Dict[str, Any]:
to_return = {} to_return = {}
# URL # URL
if one.name != two.name: if left.name != right.name:
to_return['url'] = {'message': 'The nodes have different URLs.', to_return['url'] = {'message': 'The nodes have different URLs.',
'details': [one.name, two.name]} 'details': [left.name, right.name]}
# Hostname # Hostname
if one.hostname != two.hostname: if left.hostname != right.hostname:
to_return['hostname'] = {'message': 'The nodes have different hostnames.', to_return['hostname'] = {'message': 'The nodes have different hostnames.',
'details': [one.hostname, two.hostname]} 'details': [left.hostname, right.hostname]}
else: else:
to_return['hostname'] = {'message': 'The nodes have the same hostname.', to_return['hostname'] = {'message': 'The nodes have the same hostname.',
'details': one.hostname} 'details': left.hostname}
else: else:
to_return['url'] = {'message': 'The nodes have the same URL.', to_return['url'] = {'message': 'The nodes have the same URL.',
'details': one.name} 'details': left.name}
# IP in HAR # IP in HAR
if one.ip_address != two.ip_address: if left.ip_address != right.ip_address:
to_return['ip'] = {'message': 'The nodes load content from different IPs.', to_return['ip'] = {'message': 'The nodes load content from different IPs.',
'details': [str(one.ip_address), str(two.ip_address)]} 'details': [str(left.ip_address), str(right.ip_address)]}
else: else:
to_return['ip'] = {'message': 'The nodes load content from the same IP.', to_return['ip'] = {'message': 'The nodes load content from the same IP.',
'details': str(one.ip_address)} 'details': str(left.ip_address)}
# IPs in hostnode + ASNs # IPs in hostnode + ASNs
return to_return return to_return
def compare_captures(self, capture_one, capture_two, /) -> Dict[str, Any]: def compare_captures(self, capture_left, capture_right, /) -> Dict[str, Any]:
if capture_one not in self._captures_index: if capture_left not in self._captures_index:
raise MissingUUID(f'{capture_one} does not exists.') raise MissingUUID(f'{capture_left} does not exists.')
if capture_two not in self._captures_index: if capture_right not in self._captures_index:
raise MissingUUID(f'{capture_two} does not exists.') raise MissingUUID(f'{capture_right} does not exists.')
to_return: Dict[str, Dict[str, Union[str, to_return: Dict[str, Dict[str, Union[str,
List[Union[str, Dict[str, Any]]], List[Union[str, Dict[str, Any]]],
Dict[str, Union[int, str, Dict[str, Union[int, str,
List[Union[int, str, Dict[str, Any]]]]]]]] = {} List[Union[int, str, Dict[str, Any]]]]]]]] = {}
one = self._captures_index[capture_one] left = self._captures_index[capture_left]
two = self._captures_index[capture_two] right = self._captures_index[capture_right]
# Compare initial URL (first entry in HAR) # Compare initial URL (first entry in HAR)
if one.tree.root_url != two.tree.root_url: if left.tree.root_url != right.tree.root_url:
to_return['root_url'] = {'message': 'The captures are for different URLs.', to_return['root_url'] = {'message': 'The captures are for different URLs.',
'details': [one.tree.root_url, two.tree.root_url]} 'details': [left.tree.root_url, right.tree.root_url]}
else: else:
to_return['root_url'] = {'message': 'The captures are the same URL.', to_return['root_url'] = {'message': 'The captures are the same URL.',
'details': one.tree.root_url} 'details': left.tree.root_url}
# Compare landing page (URL in browser) # Compare landing page (URL in browser)
if one.tree.root_hartree.har.final_redirect != two.tree.root_hartree.har.final_redirect: if left.tree.root_hartree.har.final_redirect != right.tree.root_hartree.har.final_redirect:
to_return['final_url'] = {'message': 'The landing page is different.', to_return['final_url'] = {'message': 'The landing page is different.',
'details': [one.tree.root_hartree.har.final_redirect, two.tree.root_hartree.har.final_redirect]} 'details': [left.tree.root_hartree.har.final_redirect, right.tree.root_hartree.har.final_redirect]}
# => if different, check if the hostname is the same # => if different, check if the hostname is the same
if one.tree.root_hartree.rendered_node.hostname != two.tree.root_hartree.rendered_node.hostname: if left.tree.root_hartree.rendered_node.hostname != right.tree.root_hartree.rendered_node.hostname:
to_return['final_hostname'] = {'message': 'The hostname of the rendered page is different.', to_return['final_hostname'] = {'message': 'The hostname of the rendered page is different.',
'details': [one.tree.root_hartree.rendered_node.hostname, two.tree.root_hartree.rendered_node.hostname]} 'details': [left.tree.root_hartree.rendered_node.hostname, right.tree.root_hartree.rendered_node.hostname]}
else: else:
to_return['final_hostname'] = {'message': 'The hostname of the rendered page is the same.', to_return['final_hostname'] = {'message': 'The hostname of the rendered page is the same.',
'details': one.tree.root_hartree.rendered_node.hostname} 'details': left.tree.root_hartree.rendered_node.hostname}
else: else:
to_return['final_url'] = {'message': 'The landing page is the same.', to_return['final_url'] = {'message': 'The landing page is the same.',
'details': one.tree.root_hartree.har.final_redirect} 'details': left.tree.root_hartree.har.final_redirect}
if one.tree.root_hartree.rendered_node.response['status'] != two.tree.root_hartree.rendered_node.response['status']: if left.tree.root_hartree.rendered_node.response['status'] != right.tree.root_hartree.rendered_node.response['status']:
to_return['final_status_code'] = {'message': 'The status code of the rendered page is different.', to_return['final_status_code'] = {'message': 'The status code of the rendered page is different.',
'details': [one.tree.root_hartree.rendered_node.response['status'], two.tree.root_hartree.rendered_node.response['status']]} 'details': [left.tree.root_hartree.rendered_node.response['status'], right.tree.root_hartree.rendered_node.response['status']]}
else: else:
to_return['final_status_code'] = {'message': 'The status code of the rendered page is the same.', to_return['final_status_code'] = {'message': 'The status code of the rendered page is the same.',
'details': one.tree.root_hartree.rendered_node.response['status']} 'details': left.tree.root_hartree.rendered_node.response['status']}
to_return['redirects'] = {'length': {}, 'nodes': []} to_return['redirects'] = {'length': {}, 'nodes': []}
if len(one.tree.redirects) != len(two.tree.redirects): if len(left.tree.redirects) != len(right.tree.redirects):
to_return['redirects']['length'] = {'message': 'The captures have a different amount of redirects', to_return['redirects']['length'] = {'message': 'The captures have a different amount of redirects',
'details': [len(one.tree.redirects), len(two.tree.redirects)]} 'details': [len(left.tree.redirects), len(right.tree.redirects)]}
else: else:
to_return['redirects']['length'] = {'message': 'The captures have the same number of redirects', to_return['redirects']['length'] = {'message': 'The captures have the same number of redirects',
'details': len(one.tree.redirects)} 'details': len(left.tree.redirects)}
# Compare chain of redirects # Compare chain of redirects
redirect_nodes_one = [a for a in reversed(one.tree.root_hartree.rendered_node.get_ancestors())] + [one.tree.root_hartree.rendered_node] redirect_nodes_left = [a for a in reversed(left.tree.root_hartree.rendered_node.get_ancestors())] + [left.tree.root_hartree.rendered_node]
redirect_nodes_two = [a for a in reversed(two.tree.root_hartree.rendered_node.get_ancestors())] + [two.tree.root_hartree.rendered_node] redirect_nodes_right = [a for a in reversed(right.tree.root_hartree.rendered_node.get_ancestors())] + [right.tree.root_hartree.rendered_node]
for redirect_one, redirect_two in zip(redirect_nodes_one, redirect_nodes_two): for redirect_left, redirect_right in zip(redirect_nodes_left, redirect_nodes_right):
if isinstance(to_return['redirects']['nodes'], list): if isinstance(to_return['redirects']['nodes'], list):
to_return['redirects']['nodes'].append(self.compare_nodes(redirect_one, redirect_two)) to_return['redirects']['nodes'].append(self.compare_nodes(redirect_left, redirect_right))
# Compare all ressources URLs
to_return['ressources'] = {}
ressources_left = {a.name for a in left.tree.root_hartree.rendered_node.traverse()}
ressources_right = {a.name for a in right.tree.root_hartree.rendered_node.traverse()}
if present_in_both := ressources_left & ressources_right:
to_return['ressources']['both'] = sorted(present_in_both)
if present_left := ressources_left - ressources_right:
to_return['ressources']['left'] = sorted(present_left)
if present_right := ressources_right - ressources_left:
to_return['ressources']['right'] = sorted(present_right)
# IP/ASN checks - Note: there is the IP in the HAR, and the ones resolved manually - if the IP is different, but part of the list, it's cool # IP/ASN checks - Note: there is the IP in the HAR, and the ones resolved manually - if the IP is different, but part of the list, it's cool
# For each node up to the landing page # For each node up to the landing page

View File

@ -21,7 +21,7 @@ api = Namespace('GenericAPI', description='Generic Lookyloo API', path='/')
lookyloo: Lookyloo = Lookyloo() lookyloo: Lookyloo = Lookyloo()
comaprator: Comparator = Comparator() comparator: Comparator = Comparator()
def api_auth_check(method): def api_auth_check(method):
@ -444,19 +444,18 @@ class CaptureExport(Resource):
# Compare captures (WiP) # Compare captures (WiP)
compare_captures_fields = api.model('CompareCapturesFields', { compare_captures_fields = api.model('CompareCapturesFields', {
'capture_one': fields.String(description="The first capture to compare.", required=True), 'capture_left': fields.String(description="Left capture to compare.", required=True),
'capture_two': fields.String(description="The second capture to compare.", required=True), 'capture_right': fields.String(description="Right capture to compare.", required=True),
}) })
@api.route('/json/compare_captures') @api.route('/json/compare_captures')
@api.doc(description='Compare two captures (WiP)') @api.doc(description='Compare two captures')
class CompareCaptures(Resource): class CompareCaptures(Resource):
@api.doc(body=compare_captures_fields) @api.doc(body=compare_captures_fields)
def post(self): def post(self):
parameters: Dict = request.get_json(force=True) # type: ignore parameters: Dict = request.get_json(force=True) # type: ignore
result = comaprator.compare_captures(parameters.get('capture_one'), parameters.get('capture_two')) result = comparator.compare_captures(parameters.get('capture_left'), parameters.get('capture_right'))
print(result)
return result return result