mirror of https://github.com/CIRCL/lookyloo
new: Endpoint to get comparables from a capture
parent
b33ef171a4
commit
c62032fee2
|
@ -85,6 +85,7 @@ class Comparator():
|
||||||
'redirects': {'length': len(capture.tree.redirects)}}
|
'redirects': {'length': len(capture.tree.redirects)}}
|
||||||
|
|
||||||
to_return['redirects']['nodes'] = [self.get_comparables_node(a) for a in list(reversed(capture.tree.root_hartree.rendered_node.get_ancestors())) + [capture.tree.root_hartree.rendered_node]]
|
to_return['redirects']['nodes'] = [self.get_comparables_node(a) for a in list(reversed(capture.tree.root_hartree.rendered_node.get_ancestors())) + [capture.tree.root_hartree.rendered_node]]
|
||||||
|
to_return['ressources'] = {(a.name, a.hostname) for a in capture.tree.root_hartree.rendered_node.traverse()}
|
||||||
return to_return
|
return to_return
|
||||||
|
|
||||||
def compare_captures(self, capture_left: str, capture_right: str, /, *, settings: Optional[CompareSettings]=None) -> Dict[str, Any]:
|
def compare_captures(self, capture_left: str, capture_right: str, /, *, settings: Optional[CompareSettings]=None) -> Dict[str, Any]:
|
||||||
|
@ -143,9 +144,6 @@ class Comparator():
|
||||||
to_return['redirects']['nodes'].append(self._compare_nodes(redirect_left, redirect_right))
|
to_return['redirects']['nodes'].append(self._compare_nodes(redirect_left, redirect_right))
|
||||||
|
|
||||||
# Compare all ressources URLs
|
# Compare all ressources URLs
|
||||||
left_capture = self._captures_index[capture_left]
|
|
||||||
right_capture = self._captures_index[capture_right]
|
|
||||||
|
|
||||||
to_return['ressources'] = {}
|
to_return['ressources'] = {}
|
||||||
_settings: Optional[CompareSettings]
|
_settings: Optional[CompareSettings]
|
||||||
if settings:
|
if settings:
|
||||||
|
@ -158,14 +156,14 @@ class Comparator():
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
_settings = None
|
_settings = None
|
||||||
ressources_left = {a.name for a in left_capture.tree.root_hartree.rendered_node.traverse()
|
ressources_left = {url for url, hostname in left['ressources']
|
||||||
if not _settings
|
if not _settings
|
||||||
or not a.hostname.endswith(_settings['ressources_ignore_domains'])
|
or not hostname.endswith(_settings['ressources_ignore_domains'])
|
||||||
or not any(fnmatch.fnmatch(a.name, regex) for regex in _settings['ressources_ignore_regexes'])}
|
or not any(fnmatch.fnmatch(url, regex) for regex in _settings['ressources_ignore_regexes'])}
|
||||||
ressources_right = {a.name for a in right_capture.tree.root_hartree.rendered_node.traverse() if not settings
|
ressources_right = {url for url, hostname in right['ressources']
|
||||||
if not _settings
|
if not _settings
|
||||||
or not a.hostname.endswith(_settings['ressources_ignore_domains'])
|
or not hostname.endswith(_settings['ressources_ignore_domains'])
|
||||||
or not any(fnmatch.fnmatch(a.name, regex) for regex in _settings['ressources_ignore_regexes'])}
|
or not any(fnmatch.fnmatch(url, regex) for regex in _settings['ressources_ignore_regexes'])}
|
||||||
if present_in_both := ressources_left & ressources_right:
|
if present_in_both := ressources_left & ressources_right:
|
||||||
to_return['ressources']['both'] = sorted(present_in_both)
|
to_return['ressources']['both'] = sorted(present_in_both)
|
||||||
if present_left := ressources_left - ressources_right:
|
if present_left := ressources_left - ressources_right:
|
||||||
|
|
|
@ -12,9 +12,9 @@ from werkzeug.security import check_password_hash
|
||||||
|
|
||||||
from lacuscore import CaptureStatus as CaptureStatusCore
|
from lacuscore import CaptureStatus as CaptureStatusCore
|
||||||
from pylacus import CaptureStatus as CaptureStatusPy
|
from pylacus import CaptureStatus as CaptureStatusPy
|
||||||
from lookyloo.lookyloo import Lookyloo
|
|
||||||
from lookyloo.comparator import Comparator
|
from lookyloo.comparator import Comparator
|
||||||
from lookyloo.exceptions import MissingUUID
|
from lookyloo.exceptions import MissingUUID
|
||||||
|
from lookyloo.lookyloo import Lookyloo
|
||||||
|
|
||||||
from .helpers import build_users_table, load_user_from_request, src_request_ip
|
from .helpers import build_users_table, load_user_from_request, src_request_ip
|
||||||
|
|
||||||
|
@ -479,6 +479,37 @@ class CompareCaptures(Resource):
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
comparables_nodes_model = api.model('ComparablesNodeModel', {
|
||||||
|
'url': fields.String,
|
||||||
|
'hostname': fields.String,
|
||||||
|
'ip_address': fields.String,
|
||||||
|
})
|
||||||
|
|
||||||
|
redirects_model = api.model('RedirectsModel', {
|
||||||
|
'length': fields.Integer,
|
||||||
|
'nodes': fields.List(fields.Nested(comparables_nodes_model)),
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
comparables_model = api.model('ComparablesModel', {
|
||||||
|
'root_url': fields.String,
|
||||||
|
'final_url': fields.String,
|
||||||
|
'final_hostname': fields.String,
|
||||||
|
'final_status_code': fields.Integer,
|
||||||
|
'redirects': fields.Nested(redirects_model),
|
||||||
|
'ressources': fields.List(fields.List(fields.String)),
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
@api.route('/json/comparables/<string:capture_uuid>')
|
||||||
|
@api.doc(description='Get the data we can compare across captures')
|
||||||
|
class Comparables(Resource):
|
||||||
|
|
||||||
|
@api.marshal_with(comparables_model)
|
||||||
|
def get(self, capture_uuid: str):
|
||||||
|
return comparator.get_comparables_capture(capture_uuid)
|
||||||
|
|
||||||
|
|
||||||
# Get information for takedown
|
# Get information for takedown
|
||||||
|
|
||||||
takedown_fields = api.model('TakedownFields', {
|
takedown_fields = api.model('TakedownFields', {
|
||||||
|
|
Loading…
Reference in New Issue