new: Endpoint to get comparables from a capture

pull/671/head
Raphaël Vinot 2023-04-24 16:25:29 +02:00
parent b33ef171a4
commit c62032fee2
2 changed files with 39 additions and 10 deletions

View File

@ -85,6 +85,7 @@ class Comparator():
'redirects': {'length': len(capture.tree.redirects)}}
to_return['redirects']['nodes'] = [self.get_comparables_node(a) for a in list(reversed(capture.tree.root_hartree.rendered_node.get_ancestors())) + [capture.tree.root_hartree.rendered_node]]
to_return['ressources'] = {(a.name, a.hostname) for a in capture.tree.root_hartree.rendered_node.traverse()}
return to_return
def compare_captures(self, capture_left: str, capture_right: str, /, *, settings: Optional[CompareSettings]=None) -> Dict[str, Any]:
@ -143,9 +144,6 @@ class Comparator():
to_return['redirects']['nodes'].append(self._compare_nodes(redirect_left, redirect_right))
# Compare all ressources URLs
left_capture = self._captures_index[capture_left]
right_capture = self._captures_index[capture_right]
to_return['ressources'] = {}
_settings: Optional[CompareSettings]
if settings:
@ -158,14 +156,14 @@ class Comparator():
}
else:
_settings = None
ressources_left = {a.name for a in left_capture.tree.root_hartree.rendered_node.traverse()
ressources_left = {url for url, hostname in left['ressources']
if not _settings
or not a.hostname.endswith(_settings['ressources_ignore_domains'])
or not any(fnmatch.fnmatch(a.name, regex) for regex in _settings['ressources_ignore_regexes'])}
ressources_right = {a.name for a in right_capture.tree.root_hartree.rendered_node.traverse() if not settings
or not hostname.endswith(_settings['ressources_ignore_domains'])
or not any(fnmatch.fnmatch(url, regex) for regex in _settings['ressources_ignore_regexes'])}
ressources_right = {url for url, hostname in right['ressources']
if not _settings
or not a.hostname.endswith(_settings['ressources_ignore_domains'])
or not any(fnmatch.fnmatch(a.name, regex) for regex in _settings['ressources_ignore_regexes'])}
or not hostname.endswith(_settings['ressources_ignore_domains'])
or not any(fnmatch.fnmatch(url, regex) for regex in _settings['ressources_ignore_regexes'])}
if present_in_both := ressources_left & ressources_right:
to_return['ressources']['both'] = sorted(present_in_both)
if present_left := ressources_left - ressources_right:

View File

@ -12,9 +12,9 @@ from werkzeug.security import check_password_hash
from lacuscore import CaptureStatus as CaptureStatusCore
from pylacus import CaptureStatus as CaptureStatusPy
from lookyloo.lookyloo import Lookyloo
from lookyloo.comparator import Comparator
from lookyloo.exceptions import MissingUUID
from lookyloo.lookyloo import Lookyloo
from .helpers import build_users_table, load_user_from_request, src_request_ip
@ -479,6 +479,37 @@ class CompareCaptures(Resource):
return result
comparables_nodes_model = api.model('ComparablesNodeModel', {
'url': fields.String,
'hostname': fields.String,
'ip_address': fields.String,
})
redirects_model = api.model('RedirectsModel', {
'length': fields.Integer,
'nodes': fields.List(fields.Nested(comparables_nodes_model)),
})
comparables_model = api.model('ComparablesModel', {
'root_url': fields.String,
'final_url': fields.String,
'final_hostname': fields.String,
'final_status_code': fields.Integer,
'redirects': fields.Nested(redirects_model),
'ressources': fields.List(fields.List(fields.String)),
})
@api.route('/json/comparables/<string:capture_uuid>')
@api.doc(description='Get the data we can compare across captures')
class Comparables(Resource):
@api.marshal_with(comparables_model)
def get(self, capture_uuid: str):
return comparator.get_comparables_capture(capture_uuid)
# Get information for takedown
takedown_fields = api.model('TakedownFields', {