mirror of https://github.com/CIRCL/lookyloo
chg: add stats, avoid building big trees twice, bump deps
parent
315a2733c3
commit
1117ab6371
|
@ -2,6 +2,7 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from lookyloo.abstractmanager import AbstractManager
|
||||
from lookyloo.lookyloo import Lookyloo
|
||||
|
@ -29,6 +30,19 @@ class BackgroundIndexer(AbstractManager):
|
|||
for uuid_path in self.lookyloo.capture_dir.glob('*/uuid'):
|
||||
if (uuid_path.parent / 'tree.pickle').exists():
|
||||
continue
|
||||
lock_file = uuid_path.parent / 'lock'
|
||||
if lock_file.exists():
|
||||
try:
|
||||
with lock_file.open('r') as f:
|
||||
lock_ts = datetime.fromisoformat(f.read())
|
||||
if lock_ts < datetime.now() - timedelta(minutes=5):
|
||||
# Clear old locks. They shouldn't be there, but it's gonna happen.
|
||||
self.logger.info(f'Old lock found {lock_file}, removing it.')
|
||||
lock_file.unlink(missing_ok=True)
|
||||
except Exception as e:
|
||||
self.logger.info(f'Error while reading lock {lock_file}: {e}')
|
||||
continue
|
||||
|
||||
with uuid_path.open() as f:
|
||||
uuid = f.read()
|
||||
try:
|
||||
|
|
|
@ -340,3 +340,11 @@ def uniq_domains(uniq_urls):
|
|||
splitted = urlparse(url)
|
||||
domains.add(splitted.hostname)
|
||||
return domains
|
||||
|
||||
|
||||
def try_make_file(filename: Path):
|
||||
try:
|
||||
filename.touch(exist_ok=False)
|
||||
return True
|
||||
except FileExistsError:
|
||||
return False
|
||||
|
|
|
@ -20,6 +20,7 @@ from urllib.parse import urlsplit, urljoin
|
|||
from uuid import uuid4
|
||||
from zipfile import ZipFile
|
||||
import operator
|
||||
import time
|
||||
|
||||
from defang import refang # type: ignore
|
||||
import dns.resolver
|
||||
|
@ -38,7 +39,7 @@ from .exceptions import NoValidHarFile, MissingUUID, LookylooException
|
|||
from .helpers import (get_homedir, get_socket_path, load_cookies, get_config,
|
||||
safe_create_dir, get_email_template, load_pickle_tree,
|
||||
remove_pickle_tree, get_resources_hashes, get_taxonomies, uniq_domains,
|
||||
CaptureStatus)
|
||||
CaptureStatus, try_make_file)
|
||||
from .modules import VirusTotal, SaneJavaScript, PhishingInitiative, MISP, UniversalWhois
|
||||
from .capturecache import CaptureCache
|
||||
from .context import Context
|
||||
|
@ -149,6 +150,25 @@ class Lookyloo():
|
|||
'''Generate the pickle, set the cache, add capture in the indexes'''
|
||||
capture_dir = self._get_capture_dir(capture_uuid)
|
||||
har_files = sorted(capture_dir.glob('*.har'))
|
||||
lock_file = capture_dir / 'lock'
|
||||
pickle_file = capture_dir / 'tree.pickle'
|
||||
|
||||
if try_make_file(lock_file):
|
||||
# Lock created, we can process
|
||||
with lock_file.open('w') as f:
|
||||
f.write(datetime.now().isoformat())
|
||||
else:
|
||||
# The pickle is being created somewhere else, wait until it's done.
|
||||
while lock_file.exists():
|
||||
time.sleep(5)
|
||||
keep_going = 5
|
||||
while (ct := load_pickle_tree(capture_dir)) is None:
|
||||
keep_going -= 1
|
||||
if not keep_going:
|
||||
raise LookylooException(f'Unable to get tree for {capture_uuid}')
|
||||
time.sleep(5)
|
||||
return ct
|
||||
|
||||
# NOTE: We only index the public captures
|
||||
index = True
|
||||
try:
|
||||
|
@ -175,7 +195,7 @@ class Lookyloo():
|
|||
except RecursionError as e:
|
||||
raise NoValidHarFile(f'Tree too deep, probably a recursive refresh: {e}.\n Append /export to the URL to get the files.')
|
||||
|
||||
with (capture_dir / 'tree.pickle').open('wb') as _p:
|
||||
with pickle_file.open('wb') as _p:
|
||||
# Some pickles require a pretty high recursion limit, this kindof fixes it.
|
||||
# If the capture is really broken (generally a refresh to self), the capture
|
||||
# is discarded in the RecursionError above.
|
||||
|
@ -183,6 +203,7 @@ class Lookyloo():
|
|||
sys.setrecursionlimit(int(default_recursion_limit * 1.1))
|
||||
pickle.dump(ct, _p)
|
||||
sys.setrecursionlimit(default_recursion_limit)
|
||||
lock_file.unlink(missing_ok=True)
|
||||
return ct
|
||||
|
||||
def _build_cname_chain(self, known_cnames: Dict[str, Optional[str]], hostname) -> List[str]:
|
||||
|
|
|
@ -166,6 +166,8 @@ class UniversalWhois():
|
|||
self.query_whois_hostnode(n)
|
||||
|
||||
def whois(self, query: str) -> str:
|
||||
if not self.available:
|
||||
return ''
|
||||
bytes_whois = b''
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
|
||||
sock.connect((self.server, self.port))
|
||||
|
|
|
@ -307,7 +307,7 @@ hyperframe = ">=5.2.0,<6"
|
|||
|
||||
[[package]]
|
||||
name = "har2tree"
|
||||
version = "1.6.0"
|
||||
version = "1.6.1"
|
||||
description = "HTTP Archive (HAR) to ETE Toolkit generator"
|
||||
category = "main"
|
||||
optional = false
|
||||
|
@ -1059,16 +1059,16 @@ python-versions = "*"
|
|||
|
||||
[[package]]
|
||||
name = "urllib3"
|
||||
version = "1.26.4"
|
||||
version = "1.26.5"
|
||||
description = "HTTP library with thread-safe connection pooling, file post, and more."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4"
|
||||
|
||||
[package.extras]
|
||||
brotli = ["brotlipy (>=0.6.0)"]
|
||||
secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"]
|
||||
socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
|
||||
brotli = ["brotlipy (>=0.6.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "vt-py"
|
||||
|
@ -1366,8 +1366,8 @@ h2 = [
|
|||
{file = "h2-3.2.0.tar.gz", hash = "sha256:875f41ebd6f2c44781259005b157faed1a5031df3ae5aa7bcb4628a6c0782f14"},
|
||||
]
|
||||
har2tree = [
|
||||
{file = "har2tree-1.6.0-py3-none-any.whl", hash = "sha256:8d4469ddea36da12ec7b25fc098740cfd66e2e565b8a013415261784eaa82cf4"},
|
||||
{file = "har2tree-1.6.0.tar.gz", hash = "sha256:572b85b5470fd544152c2b1bb41cf1257f4256e2a7a1936bcd6fb06a8f7784f2"},
|
||||
{file = "har2tree-1.6.1-py3-none-any.whl", hash = "sha256:2db656b47986a682f46c3bdcbe928ff4048c1b8ccbdc557306de368518584f59"},
|
||||
{file = "har2tree-1.6.1.tar.gz", hash = "sha256:f34627f80aa155e28c920d17d3390d15fe71874652e0d54b52025dede5f50030"},
|
||||
]
|
||||
hpack = [
|
||||
{file = "hpack-3.0.0-py2.py3-none-any.whl", hash = "sha256:0edd79eda27a53ba5be2dfabf3b15780928a0dff6eb0c60a3d6767720e970c89"},
|
||||
|
@ -1878,8 +1878,8 @@ typing-extensions = [
|
|||
{file = "typing_extensions-3.10.0.0.tar.gz", hash = "sha256:50b6f157849174217d0656f99dc82fe932884fb250826c18350e159ec6cdf342"},
|
||||
]
|
||||
urllib3 = [
|
||||
{file = "urllib3-1.26.4-py2.py3-none-any.whl", hash = "sha256:2f4da4594db7e1e110a944bb1b551fdf4e6c136ad42e4234131391e21eb5b0df"},
|
||||
{file = "urllib3-1.26.4.tar.gz", hash = "sha256:e7b021f7241115872f92f43c6508082facffbd1c048e3c6e2bb9c2a157e28937"},
|
||||
{file = "urllib3-1.26.5-py2.py3-none-any.whl", hash = "sha256:753a0374df26658f99d826cfe40394a686d05985786d946fbe4165b5148f5a7c"},
|
||||
{file = "urllib3-1.26.5.tar.gz", hash = "sha256:a7acd0977125325f516bda9735fa7142b909a8d01e8b2e4c8108d0984e6e0098"},
|
||||
]
|
||||
vt-py = [
|
||||
{file = "vt-py-0.6.3.tar.gz", hash = "sha256:172916d07b54927271e62dd3ead03142189d7431e9ec0fdbb75fe09f68efa888"},
|
||||
|
|
|
@ -47,7 +47,7 @@ vt-py = "^0.6.2"
|
|||
pyeupi = "^1.1"
|
||||
scrapysplashwrapper = "^1.6.0"
|
||||
pysanejs = "^1.4"
|
||||
har2tree = "^1.6.0"
|
||||
har2tree = "^1.6.1"
|
||||
pylookyloo = "^1.6"
|
||||
dnspython = "^2.1.0"
|
||||
pytaxonomies = "^1.3"
|
||||
|
|
|
@ -110,7 +110,7 @@
|
|||
{% for url in urls %}
|
||||
{# URL Display #}
|
||||
<li class="list-group-item">
|
||||
<div class="h3" title={{ url['url_object'].name }}>
|
||||
<div class="h3" title="{{ url['url_object'].name }}">
|
||||
{# HTTPs or not #}
|
||||
{% if url['encrypted'] %}
|
||||
<img src="/static/secure.svg" title="Encrypted request" width="21" height="21"/>
|
||||
|
@ -162,7 +162,11 @@
|
|||
{# Details of the response #}
|
||||
<p class="h4">Response
|
||||
<small>(Status code:
|
||||
<span title="{{ http_status_description(url['url_object'].response['status']) }}">{{ url['url_object'].response['status'] }})</span>
|
||||
<span title="{{ http_status_description(url['url_object'].response['status']) }}">
|
||||
{{ url['url_object'].response['status'] }})
|
||||
</span>
|
||||
-
|
||||
<span>Load time: {{ url['url_object'].time.total_seconds() }}s</span>
|
||||
</small>
|
||||
</p>
|
||||
{{ popup_icons(keys_response, url['url_object'], tree_uuid) }}
|
||||
|
|
|
@ -28,5 +28,11 @@
|
|||
|
||||
<dt class="col-sm-2">Total Nodes</dt>
|
||||
<dd class="col-sm-10">{{ stats['total_hostnames'] }}</dd>
|
||||
|
||||
<dt class="col-sm-2">Total load time</dt>
|
||||
<dd class="col-sm-10">{{ stats['total_load_time'] }}</dd>
|
||||
|
||||
<dt class="col-sm-2">Total size responses</dt>
|
||||
<dd class="col-sm-10">{{ sizeof_fmt(stats['total_size_responses']) }}</dd>
|
||||
</dl>
|
||||
</div>
|
||||
|
|
Loading…
Reference in New Issue