mirror of https://github.com/CIRCL/lookyloo
Merge branch 'main' into restx
commit
7ad9aa7bbc
|
@ -22,11 +22,6 @@ jobs:
|
||||||
# a pull request then we can checkout the head.
|
# a pull request then we can checkout the head.
|
||||||
fetch-depth: 2
|
fetch-depth: 2
|
||||||
|
|
||||||
# If this run was triggered by a pull request event, then checkout
|
|
||||||
# the head of the pull request instead of the merge commit.
|
|
||||||
- run: git checkout HEAD^2
|
|
||||||
if: ${{ github.event_name == 'pull_request' }}
|
|
||||||
|
|
||||||
# Initializes the CodeQL tools for scanning.
|
# Initializes the CodeQL tools for scanning.
|
||||||
- name: Initialize CodeQL
|
- name: Initialize CodeQL
|
||||||
uses: github/codeql-action/init@v1
|
uses: github/codeql-action/init@v1
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import json
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
@ -10,13 +9,13 @@ from typing import Any, Dict, List, Optional, Tuple
|
||||||
from .exceptions import LookylooException
|
from .exceptions import LookylooException
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class CaptureCache():
|
class CaptureCache():
|
||||||
__default_cache_keys: Tuple[str, str, str, str, str, str] = \
|
__slots__ = ('uuid', 'title', 'timestamp', 'url', 'redirects', 'capture_dir',
|
||||||
('uuid', 'title', 'timestamp', 'url', 'redirects', 'capture_dir')
|
'error', 'incomplete_redirects', 'no_index', 'categories', 'parent')
|
||||||
|
|
||||||
def __init__(self, cache_entry: Dict[str, Any]):
|
def __init__(self, cache_entry: Dict[str, Any]):
|
||||||
if all(key in cache_entry.keys() for key in self.__default_cache_keys):
|
__default_cache_keys: Tuple[str, str, str, str, str, str] = ('uuid', 'title', 'timestamp', 'url', 'redirects', 'capture_dir')
|
||||||
|
if all(key in cache_entry.keys() for key in __default_cache_keys):
|
||||||
self.uuid: str = cache_entry['uuid']
|
self.uuid: str = cache_entry['uuid']
|
||||||
self.title: str = cache_entry['title']
|
self.title: str = cache_entry['title']
|
||||||
self.timestamp: datetime = datetime.strptime(cache_entry['timestamp'], '%Y-%m-%dT%H:%M:%S.%f%z')
|
self.timestamp: datetime = datetime.strptime(cache_entry['timestamp'], '%Y-%m-%dT%H:%M:%S.%f%z')
|
||||||
|
@ -24,7 +23,7 @@ class CaptureCache():
|
||||||
self.redirects: List[str] = json.loads(cache_entry['redirects'])
|
self.redirects: List[str] = json.loads(cache_entry['redirects'])
|
||||||
self.capture_dir: Path = Path(cache_entry['capture_dir'])
|
self.capture_dir: Path = Path(cache_entry['capture_dir'])
|
||||||
elif not cache_entry.get('error'):
|
elif not cache_entry.get('error'):
|
||||||
missing = set(self.__default_cache_keys) - set(cache_entry.keys())
|
missing = set(__default_cache_keys) - set(cache_entry.keys())
|
||||||
raise LookylooException(f'Missing keys ({missing}), no error message. It should not happen.')
|
raise LookylooException(f'Missing keys ({missing}), no error message. It should not happen.')
|
||||||
|
|
||||||
# Error without all the keys in __default_cache_keys was fatal.
|
# Error without all the keys in __default_cache_keys was fatal.
|
||||||
|
|
|
@ -33,7 +33,7 @@ class Context():
|
||||||
p = self.redis.pipeline()
|
p = self.redis.pipeline()
|
||||||
if filename == 'generic':
|
if filename == 'generic':
|
||||||
# 1px images, files with spaces, empty => non-relevant stuff
|
# 1px images, files with spaces, empty => non-relevant stuff
|
||||||
for k, type_content in file_content.items():
|
for _, type_content in file_content.items():
|
||||||
p.hmset('known_content', {h: type_content['description'] for h in type_content['entries']})
|
p.hmset('known_content', {h: type_content['description'] for h in type_content['entries']})
|
||||||
elif filename == 'malicious':
|
elif filename == 'malicious':
|
||||||
# User defined as malicious
|
# User defined as malicious
|
||||||
|
@ -133,7 +133,7 @@ class Context():
|
||||||
# this is the hash of an embeded content so it won't have a filename but has a different mimetype
|
# this is the hash of an embeded content so it won't have a filename but has a different mimetype
|
||||||
# FIXME: this is ugly.
|
# FIXME: this is ugly.
|
||||||
for ressource_mimetype, blobs in urlnode.embedded_ressources.items():
|
for ressource_mimetype, blobs in urlnode.embedded_ressources.items():
|
||||||
for ressource_h, b in blobs:
|
for ressource_h, _ in blobs:
|
||||||
if ressource_h == h:
|
if ressource_h == h:
|
||||||
mimetype = ressource_mimetype.split(';')[0]
|
mimetype = ressource_mimetype.split(';')[0]
|
||||||
break
|
break
|
||||||
|
|
|
@ -128,7 +128,7 @@ class Lookyloo():
|
||||||
|
|
||||||
to_store: Dict[str, Any] = {'by_frequency': []}
|
to_store: Dict[str, Any] = {'by_frequency': []}
|
||||||
uas = Counter([entry.split('|', 1)[1] for entry in entries])
|
uas = Counter([entry.split('|', 1)[1] for entry in entries])
|
||||||
for ua, count in uas.most_common():
|
for ua, _ in uas.most_common():
|
||||||
parsed_ua = UserAgent(ua)
|
parsed_ua = UserAgent(ua)
|
||||||
if not parsed_ua.platform or not parsed_ua.browser:
|
if not parsed_ua.platform or not parsed_ua.browser:
|
||||||
continue
|
continue
|
||||||
|
@ -191,7 +191,7 @@ class Lookyloo():
|
||||||
categories = list(self.categories_capture(capture_uuid).keys())
|
categories = list(self.categories_capture(capture_uuid).keys())
|
||||||
self.indexing.index_categories_capture(capture_uuid, categories)
|
self.indexing.index_categories_capture(capture_uuid, categories)
|
||||||
except Har2TreeError as e:
|
except Har2TreeError as e:
|
||||||
raise NoValidHarFile(e.message)
|
raise NoValidHarFile(e)
|
||||||
except RecursionError as e:
|
except RecursionError as e:
|
||||||
raise NoValidHarFile(f'Tree too deep, probably a recursive refresh: {e}.\n Append /export to the URL to get the files.')
|
raise NoValidHarFile(f'Tree too deep, probably a recursive refresh: {e}.\n Append /export to the URL to get the files.')
|
||||||
else:
|
else:
|
||||||
|
@ -471,7 +471,7 @@ class Lookyloo():
|
||||||
try:
|
try:
|
||||||
har = HarFile(har_files[0], uuid)
|
har = HarFile(har_files[0], uuid)
|
||||||
except Har2TreeError as e:
|
except Har2TreeError as e:
|
||||||
error_cache['error'] = e.message
|
error_cache['error'] = str(e)
|
||||||
fatal_error = True
|
fatal_error = True
|
||||||
else:
|
else:
|
||||||
error_cache['error'] = f'No har files in {capture_dir.name}'
|
error_cache['error'] = f'No har files in {capture_dir.name}'
|
||||||
|
@ -541,7 +541,7 @@ class Lookyloo():
|
||||||
'''All the capture UUIDs present in the cache.'''
|
'''All the capture UUIDs present in the cache.'''
|
||||||
return self.redis.hkeys('lookup_dirs')
|
return self.redis.hkeys('lookup_dirs')
|
||||||
|
|
||||||
def sorted_capture_cache(self, capture_uuids: Iterable[str]=[]) -> List[CaptureCache]:
|
def sorted_capture_cache(self, capture_uuids: Optional[Iterable[str]]=None) -> List[CaptureCache]:
|
||||||
'''Get all the captures in the cache, sorted by timestamp (new -> old).'''
|
'''Get all the captures in the cache, sorted by timestamp (new -> old).'''
|
||||||
if not capture_uuids:
|
if not capture_uuids:
|
||||||
# Sort all captures
|
# Sort all captures
|
||||||
|
@ -550,7 +550,7 @@ class Lookyloo():
|
||||||
# No captures at all on the instance
|
# No captures at all on the instance
|
||||||
return []
|
return []
|
||||||
|
|
||||||
all_cache: List[CaptureCache] = [self._captures_index[uuid] for uuid in capture_uuids if uuid in self._captures_index]
|
all_cache: List[CaptureCache] = [self._captures_index[uuid] for uuid in capture_uuids if uuid in self._captures_index and not self._captures_index[uuid].incomplete_redirects]
|
||||||
|
|
||||||
captures_to_get = set(capture_uuids) - set(self._captures_index.keys())
|
captures_to_get = set(capture_uuids) - set(self._captures_index.keys())
|
||||||
if captures_to_get:
|
if captures_to_get:
|
||||||
|
@ -954,7 +954,7 @@ class Lookyloo():
|
||||||
details = self.indexing.get_body_hash_urls(body_hash)
|
details = self.indexing.get_body_hash_urls(body_hash)
|
||||||
body_content = BytesIO()
|
body_content = BytesIO()
|
||||||
# get the body from the first entry in the details list
|
# get the body from the first entry in the details list
|
||||||
for url, entries in details.items():
|
for _, entries in details.items():
|
||||||
ct = self.get_crawled_tree(entries[0]['capture'])
|
ct = self.get_crawled_tree(entries[0]['capture'])
|
||||||
urlnode = ct.root_hartree.get_url_node_by_uuid(entries[0]['urlnode'])
|
urlnode = ct.root_hartree.get_url_node_by_uuid(entries[0]['urlnode'])
|
||||||
if urlnode.body_hash == body_hash:
|
if urlnode.body_hash == body_hash:
|
||||||
|
@ -962,7 +962,7 @@ class Lookyloo():
|
||||||
body_content = urlnode.body
|
body_content = urlnode.body
|
||||||
else:
|
else:
|
||||||
# The hash is an embedded resource
|
# The hash is an embedded resource
|
||||||
for mimetype, blobs in urlnode.body_hash.embedded_ressources.items():
|
for _, blobs in urlnode.body_hash.embedded_ressources.items():
|
||||||
for h, b in blobs:
|
for h, b in blobs:
|
||||||
if h == body_hash:
|
if h == body_hash:
|
||||||
body_content = b
|
body_content = b
|
||||||
|
|
|
@ -363,7 +363,7 @@ class PhishingInitiative():
|
||||||
if not force and pi_file.exists():
|
if not force and pi_file.exists():
|
||||||
return
|
return
|
||||||
|
|
||||||
for i in range(3):
|
for _ in range(3):
|
||||||
url_information = self.client.lookup(url)
|
url_information = self.client.lookup(url)
|
||||||
if not url_information['results']:
|
if not url_information['results']:
|
||||||
# No results, that should not happen (?)
|
# No results, that should not happen (?)
|
||||||
|
@ -457,7 +457,7 @@ class VirusTotal():
|
||||||
if not force and vt_file.exists():
|
if not force and vt_file.exists():
|
||||||
return
|
return
|
||||||
|
|
||||||
for i in range(3):
|
for _ in range(3):
|
||||||
try:
|
try:
|
||||||
url_information = self.client.get_object(f"/urls/{url_id}")
|
url_information = self.client.get_object(f"/urls/{url_id}")
|
||||||
with vt_file.open('w') as _f:
|
with vt_file.open('w') as _f:
|
||||||
|
|
|
@ -1122,7 +1122,7 @@ types-MarkupSafe = "*"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "types-markupsafe"
|
name = "types-markupsafe"
|
||||||
version = "1.1.1"
|
version = "1.1.2"
|
||||||
description = "Typing stubs for MarkupSafe"
|
description = "Typing stubs for MarkupSafe"
|
||||||
category = "dev"
|
category = "dev"
|
||||||
optional = false
|
optional = false
|
||||||
|
@ -1979,7 +1979,7 @@ types-jinja2 = [
|
||||||
{file = "types_Jinja2-2.11.1-py2.py3-none-any.whl", hash = "sha256:84f66a2612376ff3c1656198b10a04c7711b6d917d4f5f7f2c2f7ecec0afa040"},
|
{file = "types_Jinja2-2.11.1-py2.py3-none-any.whl", hash = "sha256:84f66a2612376ff3c1656198b10a04c7711b6d917d4f5f7f2c2f7ecec0afa040"},
|
||||||
]
|
]
|
||||||
types-markupsafe = [
|
types-markupsafe = [
|
||||||
{file = "types_MarkupSafe-1.1.1-py2.py3-none-any.whl", hash = "sha256:a2c32269a26b4a7205f6f1581bd37cfbcd390297352b828a9643978392239516"},
|
{file = "types_MarkupSafe-1.1.2-py2.py3-none-any.whl", hash = "sha256:b5e311cb6aad7f6da0bb1455494305e2bb7941b04c3c8cf9ed7bbd33cf8ba374"},
|
||||||
]
|
]
|
||||||
types-pkg-resources = [
|
types-pkg-resources = [
|
||||||
{file = "types_pkg_resources-0.1.2-py2.py3-none-any.whl", hash = "sha256:42d640500de564f1ccc21f918117afadf78039e4fa7f513c647ccf742d609aeb"},
|
{file = "types_pkg_resources-0.1.2-py2.py3-none-any.whl", hash = "sha256:42d640500de564f1ccc21f918117afadf78039e4fa7f513c647ccf742d609aeb"},
|
||||||
|
|
Loading…
Reference in New Issue