fix: [tree] fix icons rendering

pull/1017/head
Raphaël Vinot 2024-12-13 23:00:22 +01:00
parent 9c871d8cb7
commit 019f16e48f
No known key found for this signature in database
GPG Key ID: 32E4E1C133B3792F
6 changed files with 72 additions and 64 deletions

View File

@ -32,7 +32,7 @@ from pyipasnhistory import IPASNHistory # type: ignore[attr-defined]
from redis import Redis
from .context import Context
from .helpers import get_captures_dir, is_locked, load_pickle_tree, get_pickle_path, remove_pickle_tree, get_indexing
from .helpers import get_captures_dir, is_locked, load_pickle_tree, get_pickle_path, remove_pickle_tree, get_indexing, mimetype_to_generic
from .default import LookylooException, try_make_file, get_config
from .exceptions import MissingCaptureDirectory, NoValidHarFile, MissingUUID, TreeNeedsRebuild
from .modules import Cloudflare
@ -291,6 +291,14 @@ class CapturesIndex(Mapping): # type: ignore[type-arg]
default_recursion_limit = sys.getrecursionlimit()
with self._timeout_context():
tree = CrawledTree(har_files, uuid)
for node in tree.root_hartree.hostname_tree.traverse():
for url in node.urls:
if 'mimetype' in url.features:
generic_type = mimetype_to_generic(url.mimetype)
if generic_type not in node.features:
node.add_feature(generic_type, 1)
else:
node.add_feature(generic_type, getattr(node, generic_type) + 1)
await self.__resolve_dns(tree, logger)
if self.contextualizer:
self.contextualizer.contextualize_tree(tree)

View File

@ -511,3 +511,61 @@ def load_pickle_tree(capture_dir: Path, last_mod_time: int, logger: Logger) -> C
raise TreeNeedsRebuild('We have HAR files and need to rebuild the tree.')
# The tree doesn't need to be rebuilt if there are no HAR files.
raise NoValidHarFile("Couldn't find HAR files")
def mimetype_to_generic(mimetype: str | None) -> str:
if not mimetype or mimetype == 'none':
return 'unset_mimetype'
elif 'javascript' in mimetype or 'ecmascript' in mimetype or mimetype.startswith('js'):
return 'js'
elif (mimetype.startswith('image')
or mimetype.startswith('img')
or 'webp' in mimetype):
return 'image'
elif mimetype.startswith('text/css'):
return 'css'
elif 'json' in mimetype:
return 'json'
elif 'html' in mimetype:
return 'html'
elif ('font' in mimetype
or 'woff' in mimetype
or 'opentype' in mimetype):
return 'font'
elif ('octet-stream' in mimetype
or 'application/x-protobuf' in mimetype
or 'application/pkix-cert' in mimetype
or 'application/x-123' in mimetype
or 'application/x-binary' in mimetype
or 'application/x-msdownload' in mimetype
or 'application/x-thrift' in mimetype
or 'application/x-troff-man' in mimetype
or 'application/x-typekit-augmentation' in mimetype
or 'application/grpc-web' in mimetype
or 'model/gltf-binary' in mimetype
or 'model/obj' in mimetype
or 'application/wasm' in mimetype):
return 'octet-stream'
elif ('text' in mimetype or 'xml' in mimetype
or mimetype.startswith('multipart')
or mimetype.startswith('message')
or 'application/x-www-form-urlencoded' in mimetype
or 'application/vnd.oasis.opendocument.formula-template' in mimetype):
return 'text'
elif 'video' in mimetype:
return 'video'
elif ('audio' in mimetype or 'ogg' in mimetype):
return 'audio'
elif ('mpegurl' in mimetype
or 'application/vnd.yt-ump' in mimetype):
return 'livestream'
elif ('application/x-shockwave-flash' in mimetype
or 'application/x-shockware-flash' in mimetype): # Yes, shockwaRe
return 'flash'
elif 'application/pdf' in mimetype:
return 'pdf'
elif ('application/gzip' in mimetype
or 'application/zip' in mimetype):
return 'archive'
else:
return 'unknown_mimetype'

View File

@ -45,7 +45,8 @@ from lookyloo.default import get_config
from lookyloo.exceptions import MissingUUID, NoValidHarFile, LacusUnreachable
from lookyloo.helpers import (UserAgents, load_cookies,
load_user_config,
get_taxonomies
get_taxonomies,
mimetype_to_generic
)
from zoneinfo import available_timezones
@ -53,8 +54,7 @@ from zoneinfo import available_timezones
from .genericapi import api as generic_api
from .helpers import (User, build_users_table, get_secret_key,
load_user_from_request, src_request_ip, sri_load,
get_lookyloo_instance, get_indexing, build_keys_table,
mimetype_to_generic)
get_lookyloo_instance, get_indexing, build_keys_table)
from .proxied import ReverseProxied
logging.config.dictConfig(get_config('logging'))

View File

@ -855,7 +855,7 @@ class TLDCaptures(Resource): # type: ignore[misc]
@api.param('tld', 'Get captures with a specific TLD and their capture timestamp.') # type: ignore[misc]
@api.param('urls_only', 'Returns recent URLs with that TLD, regardless the capture.') # type: ignore[misc]
@api.param('most_recent_capture', 'Timestamp of the most recent capture to check for a TLD (fallback to now)') # type: ignore[misc]
@api.param('oldest_capture', 'Timestamp of the oldest capture to check for a TLD (fallback to 5 days ago)') # type: ignore[misc]
@api.param('oldest_capture', 'Timestamp of the oldest capture to check for a TLD (fallback to 1 day ago)') # type: ignore[misc]
def get(self) -> list[tuple[str, float]] | list[str]:
tld: str | None = request.args['tld'] if request.args.get('tld') else None
if not tld:

View File

@ -126,61 +126,3 @@ def get_indexing(user: User | None) -> Indexing:
It is only accessible to the admin user.
'''
return get_indexing_cache(full=bool(user and user.is_authenticated))
def mimetype_to_generic(mimetype: str | None) -> str:
if not mimetype or mimetype == 'none':
return 'unset_mimetype'
elif 'javascript' in mimetype or 'ecmascript' in mimetype or mimetype.startswith('js'):
return 'js'
elif (mimetype.startswith('image')
or mimetype.startswith('img')
or 'webp' in mimetype):
return 'image'
elif mimetype.startswith('text/css'):
return 'css'
elif 'json' in mimetype:
return 'json'
elif 'html' in mimetype:
return 'html'
elif ('font' in mimetype
or 'woff' in mimetype
or 'opentype' in mimetype):
return 'font'
elif ('octet-stream' in mimetype
or 'application/x-protobuf' in mimetype
or 'application/pkix-cert' in mimetype
or 'application/x-123' in mimetype
or 'application/x-binary' in mimetype
or 'application/x-msdownload' in mimetype
or 'application/x-thrift' in mimetype
or 'application/x-troff-man' in mimetype
or 'application/x-typekit-augmentation' in mimetype
or 'application/grpc-web' in mimetype
or 'model/gltf-binary' in mimetype
or 'model/obj' in mimetype
or 'application/wasm' in mimetype):
return 'octet-stream'
elif ('text' in mimetype or 'xml' in mimetype
or mimetype.startswith('multipart')
or mimetype.startswith('message')
or 'application/x-www-form-urlencoded' in mimetype
or 'application/vnd.oasis.opendocument.formula-template' in mimetype):
return 'text'
elif 'video' in mimetype:
return 'video'
elif ('audio' in mimetype or 'ogg' in mimetype):
return 'audio'
elif ('mpegurl' in mimetype
or 'application/vnd.yt-ump' in mimetype):
return 'livestream'
elif ('application/x-shockwave-flash' in mimetype
or 'application/x-shockware-flash' in mimetype): # Yes, shockwaRe
return 'flash'
elif 'application/pdf' in mimetype:
return 'pdf'
elif ('application/gzip' in mimetype
or 'application/zip' in mimetype):
return 'archive'
else:
return 'unknown_mimetype'

View File

@ -289,7 +289,7 @@
{% for hash, details in url['embedded_ressources'].items() %}
<div>
{{hash_info(tree_uuid, url['url_object'].uuid, details['type'], hash,
details['body_size'], details['hash_freq'],
details['body_size'], details.get('hash_freq', 0),
has_pandora,
details.get('legitimacy'),
details.get('known_content')) }}<br>