mirror of https://github.com/CIRCL/lookyloo
chg: add stats, avoid building big trees twice, bump deps
parent
315a2733c3
commit
1117ab6371
|
@ -2,6 +2,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
from lookyloo.abstractmanager import AbstractManager
|
from lookyloo.abstractmanager import AbstractManager
|
||||||
from lookyloo.lookyloo import Lookyloo
|
from lookyloo.lookyloo import Lookyloo
|
||||||
|
@ -29,6 +30,19 @@ class BackgroundIndexer(AbstractManager):
|
||||||
for uuid_path in self.lookyloo.capture_dir.glob('*/uuid'):
|
for uuid_path in self.lookyloo.capture_dir.glob('*/uuid'):
|
||||||
if (uuid_path.parent / 'tree.pickle').exists():
|
if (uuid_path.parent / 'tree.pickle').exists():
|
||||||
continue
|
continue
|
||||||
|
lock_file = uuid_path.parent / 'lock'
|
||||||
|
if lock_file.exists():
|
||||||
|
try:
|
||||||
|
with lock_file.open('r') as f:
|
||||||
|
lock_ts = datetime.fromisoformat(f.read())
|
||||||
|
if lock_ts < datetime.now() - timedelta(minutes=5):
|
||||||
|
# Clear old locks. They shouldn't be there, but it's gonna happen.
|
||||||
|
self.logger.info(f'Old lock found {lock_file}, removing it.')
|
||||||
|
lock_file.unlink(missing_ok=True)
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.info(f'Error while reading lock {lock_file}: {e}')
|
||||||
|
continue
|
||||||
|
|
||||||
with uuid_path.open() as f:
|
with uuid_path.open() as f:
|
||||||
uuid = f.read()
|
uuid = f.read()
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -340,3 +340,11 @@ def uniq_domains(uniq_urls):
|
||||||
splitted = urlparse(url)
|
splitted = urlparse(url)
|
||||||
domains.add(splitted.hostname)
|
domains.add(splitted.hostname)
|
||||||
return domains
|
return domains
|
||||||
|
|
||||||
|
|
||||||
|
def try_make_file(filename: Path):
|
||||||
|
try:
|
||||||
|
filename.touch(exist_ok=False)
|
||||||
|
return True
|
||||||
|
except FileExistsError:
|
||||||
|
return False
|
||||||
|
|
|
@ -20,6 +20,7 @@ from urllib.parse import urlsplit, urljoin
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
import operator
|
import operator
|
||||||
|
import time
|
||||||
|
|
||||||
from defang import refang # type: ignore
|
from defang import refang # type: ignore
|
||||||
import dns.resolver
|
import dns.resolver
|
||||||
|
@ -38,7 +39,7 @@ from .exceptions import NoValidHarFile, MissingUUID, LookylooException
|
||||||
from .helpers import (get_homedir, get_socket_path, load_cookies, get_config,
|
from .helpers import (get_homedir, get_socket_path, load_cookies, get_config,
|
||||||
safe_create_dir, get_email_template, load_pickle_tree,
|
safe_create_dir, get_email_template, load_pickle_tree,
|
||||||
remove_pickle_tree, get_resources_hashes, get_taxonomies, uniq_domains,
|
remove_pickle_tree, get_resources_hashes, get_taxonomies, uniq_domains,
|
||||||
CaptureStatus)
|
CaptureStatus, try_make_file)
|
||||||
from .modules import VirusTotal, SaneJavaScript, PhishingInitiative, MISP, UniversalWhois
|
from .modules import VirusTotal, SaneJavaScript, PhishingInitiative, MISP, UniversalWhois
|
||||||
from .capturecache import CaptureCache
|
from .capturecache import CaptureCache
|
||||||
from .context import Context
|
from .context import Context
|
||||||
|
@ -149,6 +150,25 @@ class Lookyloo():
|
||||||
'''Generate the pickle, set the cache, add capture in the indexes'''
|
'''Generate the pickle, set the cache, add capture in the indexes'''
|
||||||
capture_dir = self._get_capture_dir(capture_uuid)
|
capture_dir = self._get_capture_dir(capture_uuid)
|
||||||
har_files = sorted(capture_dir.glob('*.har'))
|
har_files = sorted(capture_dir.glob('*.har'))
|
||||||
|
lock_file = capture_dir / 'lock'
|
||||||
|
pickle_file = capture_dir / 'tree.pickle'
|
||||||
|
|
||||||
|
if try_make_file(lock_file):
|
||||||
|
# Lock created, we can process
|
||||||
|
with lock_file.open('w') as f:
|
||||||
|
f.write(datetime.now().isoformat())
|
||||||
|
else:
|
||||||
|
# The pickle is being created somewhere else, wait until it's done.
|
||||||
|
while lock_file.exists():
|
||||||
|
time.sleep(5)
|
||||||
|
keep_going = 5
|
||||||
|
while (ct := load_pickle_tree(capture_dir)) is None:
|
||||||
|
keep_going -= 1
|
||||||
|
if not keep_going:
|
||||||
|
raise LookylooException(f'Unable to get tree for {capture_uuid}')
|
||||||
|
time.sleep(5)
|
||||||
|
return ct
|
||||||
|
|
||||||
# NOTE: We only index the public captures
|
# NOTE: We only index the public captures
|
||||||
index = True
|
index = True
|
||||||
try:
|
try:
|
||||||
|
@ -175,7 +195,7 @@ class Lookyloo():
|
||||||
except RecursionError as e:
|
except RecursionError as e:
|
||||||
raise NoValidHarFile(f'Tree too deep, probably a recursive refresh: {e}.\n Append /export to the URL to get the files.')
|
raise NoValidHarFile(f'Tree too deep, probably a recursive refresh: {e}.\n Append /export to the URL to get the files.')
|
||||||
|
|
||||||
with (capture_dir / 'tree.pickle').open('wb') as _p:
|
with pickle_file.open('wb') as _p:
|
||||||
# Some pickles require a pretty high recursion limit, this kindof fixes it.
|
# Some pickles require a pretty high recursion limit, this kindof fixes it.
|
||||||
# If the capture is really broken (generally a refresh to self), the capture
|
# If the capture is really broken (generally a refresh to self), the capture
|
||||||
# is discarded in the RecursionError above.
|
# is discarded in the RecursionError above.
|
||||||
|
@ -183,6 +203,7 @@ class Lookyloo():
|
||||||
sys.setrecursionlimit(int(default_recursion_limit * 1.1))
|
sys.setrecursionlimit(int(default_recursion_limit * 1.1))
|
||||||
pickle.dump(ct, _p)
|
pickle.dump(ct, _p)
|
||||||
sys.setrecursionlimit(default_recursion_limit)
|
sys.setrecursionlimit(default_recursion_limit)
|
||||||
|
lock_file.unlink(missing_ok=True)
|
||||||
return ct
|
return ct
|
||||||
|
|
||||||
def _build_cname_chain(self, known_cnames: Dict[str, Optional[str]], hostname) -> List[str]:
|
def _build_cname_chain(self, known_cnames: Dict[str, Optional[str]], hostname) -> List[str]:
|
||||||
|
|
|
@ -166,6 +166,8 @@ class UniversalWhois():
|
||||||
self.query_whois_hostnode(n)
|
self.query_whois_hostnode(n)
|
||||||
|
|
||||||
def whois(self, query: str) -> str:
|
def whois(self, query: str) -> str:
|
||||||
|
if not self.available:
|
||||||
|
return ''
|
||||||
bytes_whois = b''
|
bytes_whois = b''
|
||||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
|
||||||
sock.connect((self.server, self.port))
|
sock.connect((self.server, self.port))
|
||||||
|
|
|
@ -307,7 +307,7 @@ hyperframe = ">=5.2.0,<6"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "har2tree"
|
name = "har2tree"
|
||||||
version = "1.6.0"
|
version = "1.6.1"
|
||||||
description = "HTTP Archive (HAR) to ETE Toolkit generator"
|
description = "HTTP Archive (HAR) to ETE Toolkit generator"
|
||||||
category = "main"
|
category = "main"
|
||||||
optional = false
|
optional = false
|
||||||
|
@ -1059,16 +1059,16 @@ python-versions = "*"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "urllib3"
|
name = "urllib3"
|
||||||
version = "1.26.4"
|
version = "1.26.5"
|
||||||
description = "HTTP library with thread-safe connection pooling, file post, and more."
|
description = "HTTP library with thread-safe connection pooling, file post, and more."
|
||||||
category = "main"
|
category = "main"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4"
|
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4"
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
|
brotli = ["brotlipy (>=0.6.0)"]
|
||||||
secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"]
|
secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"]
|
||||||
socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
|
socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
|
||||||
brotli = ["brotlipy (>=0.6.0)"]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "vt-py"
|
name = "vt-py"
|
||||||
|
@ -1366,8 +1366,8 @@ h2 = [
|
||||||
{file = "h2-3.2.0.tar.gz", hash = "sha256:875f41ebd6f2c44781259005b157faed1a5031df3ae5aa7bcb4628a6c0782f14"},
|
{file = "h2-3.2.0.tar.gz", hash = "sha256:875f41ebd6f2c44781259005b157faed1a5031df3ae5aa7bcb4628a6c0782f14"},
|
||||||
]
|
]
|
||||||
har2tree = [
|
har2tree = [
|
||||||
{file = "har2tree-1.6.0-py3-none-any.whl", hash = "sha256:8d4469ddea36da12ec7b25fc098740cfd66e2e565b8a013415261784eaa82cf4"},
|
{file = "har2tree-1.6.1-py3-none-any.whl", hash = "sha256:2db656b47986a682f46c3bdcbe928ff4048c1b8ccbdc557306de368518584f59"},
|
||||||
{file = "har2tree-1.6.0.tar.gz", hash = "sha256:572b85b5470fd544152c2b1bb41cf1257f4256e2a7a1936bcd6fb06a8f7784f2"},
|
{file = "har2tree-1.6.1.tar.gz", hash = "sha256:f34627f80aa155e28c920d17d3390d15fe71874652e0d54b52025dede5f50030"},
|
||||||
]
|
]
|
||||||
hpack = [
|
hpack = [
|
||||||
{file = "hpack-3.0.0-py2.py3-none-any.whl", hash = "sha256:0edd79eda27a53ba5be2dfabf3b15780928a0dff6eb0c60a3d6767720e970c89"},
|
{file = "hpack-3.0.0-py2.py3-none-any.whl", hash = "sha256:0edd79eda27a53ba5be2dfabf3b15780928a0dff6eb0c60a3d6767720e970c89"},
|
||||||
|
@ -1878,8 +1878,8 @@ typing-extensions = [
|
||||||
{file = "typing_extensions-3.10.0.0.tar.gz", hash = "sha256:50b6f157849174217d0656f99dc82fe932884fb250826c18350e159ec6cdf342"},
|
{file = "typing_extensions-3.10.0.0.tar.gz", hash = "sha256:50b6f157849174217d0656f99dc82fe932884fb250826c18350e159ec6cdf342"},
|
||||||
]
|
]
|
||||||
urllib3 = [
|
urllib3 = [
|
||||||
{file = "urllib3-1.26.4-py2.py3-none-any.whl", hash = "sha256:2f4da4594db7e1e110a944bb1b551fdf4e6c136ad42e4234131391e21eb5b0df"},
|
{file = "urllib3-1.26.5-py2.py3-none-any.whl", hash = "sha256:753a0374df26658f99d826cfe40394a686d05985786d946fbe4165b5148f5a7c"},
|
||||||
{file = "urllib3-1.26.4.tar.gz", hash = "sha256:e7b021f7241115872f92f43c6508082facffbd1c048e3c6e2bb9c2a157e28937"},
|
{file = "urllib3-1.26.5.tar.gz", hash = "sha256:a7acd0977125325f516bda9735fa7142b909a8d01e8b2e4c8108d0984e6e0098"},
|
||||||
]
|
]
|
||||||
vt-py = [
|
vt-py = [
|
||||||
{file = "vt-py-0.6.3.tar.gz", hash = "sha256:172916d07b54927271e62dd3ead03142189d7431e9ec0fdbb75fe09f68efa888"},
|
{file = "vt-py-0.6.3.tar.gz", hash = "sha256:172916d07b54927271e62dd3ead03142189d7431e9ec0fdbb75fe09f68efa888"},
|
||||||
|
|
|
@ -47,7 +47,7 @@ vt-py = "^0.6.2"
|
||||||
pyeupi = "^1.1"
|
pyeupi = "^1.1"
|
||||||
scrapysplashwrapper = "^1.6.0"
|
scrapysplashwrapper = "^1.6.0"
|
||||||
pysanejs = "^1.4"
|
pysanejs = "^1.4"
|
||||||
har2tree = "^1.6.0"
|
har2tree = "^1.6.1"
|
||||||
pylookyloo = "^1.6"
|
pylookyloo = "^1.6"
|
||||||
dnspython = "^2.1.0"
|
dnspython = "^2.1.0"
|
||||||
pytaxonomies = "^1.3"
|
pytaxonomies = "^1.3"
|
||||||
|
|
|
@ -110,7 +110,7 @@
|
||||||
{% for url in urls %}
|
{% for url in urls %}
|
||||||
{# URL Display #}
|
{# URL Display #}
|
||||||
<li class="list-group-item">
|
<li class="list-group-item">
|
||||||
<div class="h3" title={{ url['url_object'].name }}>
|
<div class="h3" title="{{ url['url_object'].name }}">
|
||||||
{# HTTPs or not #}
|
{# HTTPs or not #}
|
||||||
{% if url['encrypted'] %}
|
{% if url['encrypted'] %}
|
||||||
<img src="/static/secure.svg" title="Encrypted request" width="21" height="21"/>
|
<img src="/static/secure.svg" title="Encrypted request" width="21" height="21"/>
|
||||||
|
@ -162,7 +162,11 @@
|
||||||
{# Details of the response #}
|
{# Details of the response #}
|
||||||
<p class="h4">Response
|
<p class="h4">Response
|
||||||
<small>(Status code:
|
<small>(Status code:
|
||||||
<span title="{{ http_status_description(url['url_object'].response['status']) }}">{{ url['url_object'].response['status'] }})</span>
|
<span title="{{ http_status_description(url['url_object'].response['status']) }}">
|
||||||
|
{{ url['url_object'].response['status'] }})
|
||||||
|
</span>
|
||||||
|
-
|
||||||
|
<span>Load time: {{ url['url_object'].time.total_seconds() }}s</span>
|
||||||
</small>
|
</small>
|
||||||
</p>
|
</p>
|
||||||
{{ popup_icons(keys_response, url['url_object'], tree_uuid) }}
|
{{ popup_icons(keys_response, url['url_object'], tree_uuid) }}
|
||||||
|
|
|
@ -28,5 +28,11 @@
|
||||||
|
|
||||||
<dt class="col-sm-2">Total Nodes</dt>
|
<dt class="col-sm-2">Total Nodes</dt>
|
||||||
<dd class="col-sm-10">{{ stats['total_hostnames'] }}</dd>
|
<dd class="col-sm-10">{{ stats['total_hostnames'] }}</dd>
|
||||||
|
|
||||||
|
<dt class="col-sm-2">Total load time</dt>
|
||||||
|
<dd class="col-sm-10">{{ stats['total_load_time'] }}</dd>
|
||||||
|
|
||||||
|
<dt class="col-sm-2">Total size responses</dt>
|
||||||
|
<dd class="col-sm-10">{{ sizeof_fmt(stats['total_size_responses']) }}</dd>
|
||||||
</dl>
|
</dl>
|
||||||
</div>
|
</div>
|
||||||
|
|
Loading…
Reference in New Issue