new: Endpoint to get comparables from a capture

pull/671/head
Raphaël Vinot 2023-04-24 16:25:29 +02:00
parent b33ef171a4
commit c62032fee2
2 changed files with 39 additions and 10 deletions

View File

@ -85,6 +85,7 @@ class Comparator():
'redirects': {'length': len(capture.tree.redirects)}} 'redirects': {'length': len(capture.tree.redirects)}}
to_return['redirects']['nodes'] = [self.get_comparables_node(a) for a in list(reversed(capture.tree.root_hartree.rendered_node.get_ancestors())) + [capture.tree.root_hartree.rendered_node]] to_return['redirects']['nodes'] = [self.get_comparables_node(a) for a in list(reversed(capture.tree.root_hartree.rendered_node.get_ancestors())) + [capture.tree.root_hartree.rendered_node]]
to_return['ressources'] = {(a.name, a.hostname) for a in capture.tree.root_hartree.rendered_node.traverse()}
return to_return return to_return
def compare_captures(self, capture_left: str, capture_right: str, /, *, settings: Optional[CompareSettings]=None) -> Dict[str, Any]: def compare_captures(self, capture_left: str, capture_right: str, /, *, settings: Optional[CompareSettings]=None) -> Dict[str, Any]:
@ -143,9 +144,6 @@ class Comparator():
to_return['redirects']['nodes'].append(self._compare_nodes(redirect_left, redirect_right)) to_return['redirects']['nodes'].append(self._compare_nodes(redirect_left, redirect_right))
# Compare all ressources URLs # Compare all ressources URLs
left_capture = self._captures_index[capture_left]
right_capture = self._captures_index[capture_right]
to_return['ressources'] = {} to_return['ressources'] = {}
_settings: Optional[CompareSettings] _settings: Optional[CompareSettings]
if settings: if settings:
@ -158,14 +156,14 @@ class Comparator():
} }
else: else:
_settings = None _settings = None
ressources_left = {a.name for a in left_capture.tree.root_hartree.rendered_node.traverse() ressources_left = {url for url, hostname in left['ressources']
if not _settings if not _settings
or not a.hostname.endswith(_settings['ressources_ignore_domains']) or not hostname.endswith(_settings['ressources_ignore_domains'])
or not any(fnmatch.fnmatch(a.name, regex) for regex in _settings['ressources_ignore_regexes'])} or not any(fnmatch.fnmatch(url, regex) for regex in _settings['ressources_ignore_regexes'])}
ressources_right = {a.name for a in right_capture.tree.root_hartree.rendered_node.traverse() if not settings ressources_right = {url for url, hostname in right['ressources']
if not _settings if not _settings
or not a.hostname.endswith(_settings['ressources_ignore_domains']) or not hostname.endswith(_settings['ressources_ignore_domains'])
or not any(fnmatch.fnmatch(a.name, regex) for regex in _settings['ressources_ignore_regexes'])} or not any(fnmatch.fnmatch(url, regex) for regex in _settings['ressources_ignore_regexes'])}
if present_in_both := ressources_left & ressources_right: if present_in_both := ressources_left & ressources_right:
to_return['ressources']['both'] = sorted(present_in_both) to_return['ressources']['both'] = sorted(present_in_both)
if present_left := ressources_left - ressources_right: if present_left := ressources_left - ressources_right:

View File

@ -12,9 +12,9 @@ from werkzeug.security import check_password_hash
from lacuscore import CaptureStatus as CaptureStatusCore from lacuscore import CaptureStatus as CaptureStatusCore
from pylacus import CaptureStatus as CaptureStatusPy from pylacus import CaptureStatus as CaptureStatusPy
from lookyloo.lookyloo import Lookyloo
from lookyloo.comparator import Comparator from lookyloo.comparator import Comparator
from lookyloo.exceptions import MissingUUID from lookyloo.exceptions import MissingUUID
from lookyloo.lookyloo import Lookyloo
from .helpers import build_users_table, load_user_from_request, src_request_ip from .helpers import build_users_table, load_user_from_request, src_request_ip
@ -479,6 +479,37 @@ class CompareCaptures(Resource):
return result return result
comparables_nodes_model = api.model('ComparablesNodeModel', {
'url': fields.String,
'hostname': fields.String,
'ip_address': fields.String,
})
redirects_model = api.model('RedirectsModel', {
'length': fields.Integer,
'nodes': fields.List(fields.Nested(comparables_nodes_model)),
})
comparables_model = api.model('ComparablesModel', {
'root_url': fields.String,
'final_url': fields.String,
'final_hostname': fields.String,
'final_status_code': fields.Integer,
'redirects': fields.Nested(redirects_model),
'ressources': fields.List(fields.List(fields.String)),
})
@api.route('/json/comparables/<string:capture_uuid>')
@api.doc(description='Get the data we can compare across captures')
class Comparables(Resource):
@api.marshal_with(comparables_model)
def get(self, capture_uuid: str):
return comparator.get_comparables_capture(capture_uuid)
# Get information for takedown # Get information for takedown
takedown_fields = api.model('TakedownFields', { takedown_fields = api.model('TakedownFields', {