mirror of https://github.com/CIRCL/lookyloo
chg: Bump deps
parent
6d27b861b3
commit
ae9cb3e81c
|
@ -8,9 +8,9 @@ from datetime import date, timedelta
|
||||||
from typing import Any, Dict
|
from typing import Any, Dict
|
||||||
|
|
||||||
from redis import Redis
|
from redis import Redis
|
||||||
from werkzeug.useragents import UserAgent
|
|
||||||
|
|
||||||
from lookyloo.default import AbstractManager, get_config, get_homedir, get_socket_path, safe_create_dir
|
from lookyloo.default import AbstractManager, get_config, get_homedir, get_socket_path, safe_create_dir
|
||||||
|
from lookyloo.helpers import ParsedUserAgent
|
||||||
|
|
||||||
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
||||||
level=logging.INFO)
|
level=logging.INFO)
|
||||||
|
@ -47,7 +47,7 @@ class Processing(AbstractManager):
|
||||||
to_store: Dict[str, Any] = {'by_frequency': []}
|
to_store: Dict[str, Any] = {'by_frequency': []}
|
||||||
uas = Counter([entry.split('|', 1)[1] for entry in entries])
|
uas = Counter([entry.split('|', 1)[1] for entry in entries])
|
||||||
for ua, _ in uas.most_common():
|
for ua, _ in uas.most_common():
|
||||||
parsed_ua = UserAgent(ua)
|
parsed_ua = ParsedUserAgent(ua)
|
||||||
if not parsed_ua.platform or not parsed_ua.browser:
|
if not parsed_ua.platform or not parsed_ua.browser:
|
||||||
continue
|
continue
|
||||||
if parsed_ua.platform not in to_store:
|
if parsed_ua.platform not in to_store:
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
@ -352,7 +351,7 @@ class CapturesIndex(Mapping):
|
||||||
host_cnames[str(answer.name).rstrip('.')] = None
|
host_cnames[str(answer.name).rstrip('.')] = None
|
||||||
|
|
||||||
if answer.rdtype in [dns.rdatatype.RdataType.A, dns.rdatatype.RdataType.AAAA]:
|
if answer.rdtype in [dns.rdatatype.RdataType.A, dns.rdatatype.RdataType.AAAA]:
|
||||||
host_ips[str(answer.name).rstrip('.')] = list(set(str(b) for b in answer))
|
host_ips[str(answer.name).rstrip('.')] = list({str(b) for b in answer})
|
||||||
except Exception:
|
except Exception:
|
||||||
host_cnames[node.name] = None
|
host_cnames[node.name] = None
|
||||||
host_ips[node.name] = []
|
host_ips[node.name] = []
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from .default import LookylooException
|
from .default import LookylooException
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import pkg_resources
|
||||||
|
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from enum import IntEnum, unique
|
from enum import IntEnum, unique
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
|
@ -12,12 +13,15 @@ from pathlib import Path
|
||||||
from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
||||||
from urllib.parse import urljoin, urlparse
|
from urllib.parse import urljoin, urlparse
|
||||||
|
|
||||||
import pkg_resources
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from har2tree import CrawledTree, HostNode, URLNode
|
from har2tree import CrawledTree, HostNode, URLNode
|
||||||
from publicsuffix2 import PublicSuffixList, fetch # type: ignore
|
from publicsuffix2 import PublicSuffixList, fetch # type: ignore
|
||||||
from pytaxonomies import Taxonomies
|
from pytaxonomies import Taxonomies
|
||||||
from requests.exceptions import HTTPError
|
from requests.exceptions import HTTPError
|
||||||
|
from ua_parser import user_agent_parser # type: ignore
|
||||||
|
from werkzeug.user_agent import UserAgent
|
||||||
|
from werkzeug.utils import cached_property
|
||||||
|
|
||||||
from .default import get_homedir, safe_create_dir, get_config
|
from .default import get_homedir, safe_create_dir, get_config
|
||||||
|
|
||||||
|
@ -184,3 +188,28 @@ def get_cache_directory(root: Path, identifier: str, namespace: Optional[str] =
|
||||||
if namespace:
|
if namespace:
|
||||||
root = root / namespace
|
root = root / namespace
|
||||||
return root / digest[0] / digest[1] / digest[2] / digest
|
return root / digest[0] / digest[1] / digest[2] / digest
|
||||||
|
|
||||||
|
|
||||||
|
class ParsedUserAgent(UserAgent):
|
||||||
|
|
||||||
|
# from https://python.tutorialink.com/how-do-i-get-the-user-agent-with-flask/
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def _details(self):
|
||||||
|
return user_agent_parser.Parse(self.string)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def platform(self):
|
||||||
|
return self._details['os'].get('family')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def browser(self):
|
||||||
|
return self._details['user_agent'].get('family')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def version(self):
|
||||||
|
return '.'.join(
|
||||||
|
part
|
||||||
|
for key in ('major', 'minor', 'patch')
|
||||||
|
if (part := self._details['user_agent'][key]) is not None
|
||||||
|
)
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
import json
|
import json
|
||||||
|
@ -22,7 +21,6 @@ from PIL import Image # type: ignore
|
||||||
from pymisp import MISPAttribute, MISPEvent, MISPObject
|
from pymisp import MISPAttribute, MISPEvent, MISPObject
|
||||||
from redis import ConnectionPool, Redis
|
from redis import ConnectionPool, Redis
|
||||||
from redis.connection import UnixDomainSocketConnection
|
from redis.connection import UnixDomainSocketConnection
|
||||||
from werkzeug.useragents import UserAgent
|
|
||||||
|
|
||||||
from .capturecache import CaptureCache, CapturesIndex
|
from .capturecache import CaptureCache, CapturesIndex
|
||||||
from .context import Context
|
from .context import Context
|
||||||
|
@ -30,7 +28,8 @@ from .default import LookylooException, get_homedir, get_config, get_socket_path
|
||||||
from .exceptions import (MissingCaptureDirectory,
|
from .exceptions import (MissingCaptureDirectory,
|
||||||
MissingUUID, TreeNeedsRebuild, NoValidHarFile)
|
MissingUUID, TreeNeedsRebuild, NoValidHarFile)
|
||||||
from .helpers import (CaptureStatus, get_captures_dir, get_email_template,
|
from .helpers import (CaptureStatus, get_captures_dir, get_email_template,
|
||||||
get_resources_hashes, get_splash_url, get_taxonomies, uniq_domains)
|
get_resources_hashes, get_splash_url, get_taxonomies,
|
||||||
|
uniq_domains, ParsedUserAgent)
|
||||||
from .indexing import Indexing
|
from .indexing import Indexing
|
||||||
from .modules import (MISP, PhishingInitiative, UniversalWhois,
|
from .modules import (MISP, PhishingInitiative, UniversalWhois,
|
||||||
UrlScan, VirusTotal, Phishtank, Hashlookup)
|
UrlScan, VirusTotal, Phishtank, Hashlookup)
|
||||||
|
@ -153,7 +152,7 @@ class Lookyloo():
|
||||||
|
|
||||||
meta = {}
|
meta = {}
|
||||||
ct = self.get_crawled_tree(capture_uuid)
|
ct = self.get_crawled_tree(capture_uuid)
|
||||||
ua = UserAgent(ct.root_hartree.user_agent)
|
ua = ParsedUserAgent(ct.root_hartree.user_agent)
|
||||||
meta['user_agent'] = ua.string
|
meta['user_agent'] = ua.string
|
||||||
if ua.platform:
|
if ua.platform:
|
||||||
meta['os'] = ua.platform
|
meta['os'] = ua.platform
|
||||||
|
@ -162,8 +161,6 @@ class Lookyloo():
|
||||||
meta['browser'] = f'{ua.browser} {ua.version}'
|
meta['browser'] = f'{ua.browser} {ua.version}'
|
||||||
else:
|
else:
|
||||||
meta['browser'] = ua.browser
|
meta['browser'] = ua.browser
|
||||||
if ua.language:
|
|
||||||
meta['language'] = ua.language
|
|
||||||
|
|
||||||
if not meta:
|
if not meta:
|
||||||
# UA not recognized
|
# UA not recognized
|
||||||
|
@ -193,7 +190,7 @@ class Lookyloo():
|
||||||
# get existing categories if possible
|
# get existing categories if possible
|
||||||
if categ_file.exists():
|
if categ_file.exists():
|
||||||
with categ_file.open() as f:
|
with categ_file.open() as f:
|
||||||
current_categories = set(line.strip() for line in f.readlines())
|
current_categories = {line.strip() for line in f.readlines()}
|
||||||
else:
|
else:
|
||||||
current_categories = set()
|
current_categories = set()
|
||||||
current_categories.add(category)
|
current_categories.add(category)
|
||||||
|
@ -208,7 +205,7 @@ class Lookyloo():
|
||||||
# get existing categories if possible
|
# get existing categories if possible
|
||||||
if categ_file.exists():
|
if categ_file.exists():
|
||||||
with categ_file.open() as f:
|
with categ_file.open() as f:
|
||||||
current_categories = set(line.strip() for line in f.readlines())
|
current_categories = {line.strip() for line in f.readlines()}
|
||||||
else:
|
else:
|
||||||
current_categories = set()
|
current_categories = set()
|
||||||
current_categories.remove(category)
|
current_categories.remove(category)
|
||||||
|
@ -731,7 +728,7 @@ class Lookyloo():
|
||||||
ct = self.get_crawled_tree(tree_uuid)
|
ct = self.get_crawled_tree(tree_uuid)
|
||||||
hashes = ct.root_hartree.build_all_hashes(algorithm)
|
hashes = ct.root_hartree.build_all_hashes(algorithm)
|
||||||
if urls_only:
|
if urls_only:
|
||||||
return {h: set(node.name for node in nodes) for h, nodes in hashes.items()}
|
return {h: {node.name for node in nodes} for h, nodes in hashes.items()}
|
||||||
return hashes
|
return hashes
|
||||||
|
|
||||||
def merge_hashlookup_tree(self, tree_uuid: str, /) -> Tuple[Dict[str, Dict[str, Any]], int]:
|
def merge_hashlookup_tree(self, tree_uuid: str, /) -> Tuple[Dict[str, Dict[str, Any]], int]:
|
||||||
|
|
|
@ -1392,6 +1392,14 @@ category = "main"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.6"
|
python-versions = ">=3.6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ua-parser"
|
||||||
|
version = "0.10.0"
|
||||||
|
description = "Python port of Browserscope's user agent parser"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "urllib3"
|
name = "urllib3"
|
||||||
version = "1.26.9"
|
version = "1.26.9"
|
||||||
|
@ -1514,7 +1522,7 @@ misp = ["python-magic", "pydeep"]
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "1.1"
|
lock-version = "1.1"
|
||||||
python-versions = ">=3.8,<3.11"
|
python-versions = ">=3.8,<3.11"
|
||||||
content-hash = "2ccde1664f74c0986cfaf8449dd2e76cbc19100f9d43ddb168bd9ad10756ddbc"
|
content-hash = "ae88c727752a91752586821db041fcb5de7177df6ff8d7fb448de7b2c05af0e1"
|
||||||
|
|
||||||
[metadata.files]
|
[metadata.files]
|
||||||
aiohttp = [
|
aiohttp = [
|
||||||
|
@ -2537,6 +2545,10 @@ typing-extensions = [
|
||||||
{file = "typing_extensions-4.1.1-py3-none-any.whl", hash = "sha256:21c85e0fe4b9a155d0799430b0ad741cdce7e359660ccbd8b530613e8df88ce2"},
|
{file = "typing_extensions-4.1.1-py3-none-any.whl", hash = "sha256:21c85e0fe4b9a155d0799430b0ad741cdce7e359660ccbd8b530613e8df88ce2"},
|
||||||
{file = "typing_extensions-4.1.1.tar.gz", hash = "sha256:1a9462dcc3347a79b1f1c0271fbe79e844580bb598bafa1ed208b94da3cdcd42"},
|
{file = "typing_extensions-4.1.1.tar.gz", hash = "sha256:1a9462dcc3347a79b1f1c0271fbe79e844580bb598bafa1ed208b94da3cdcd42"},
|
||||||
]
|
]
|
||||||
|
ua-parser = [
|
||||||
|
{file = "ua-parser-0.10.0.tar.gz", hash = "sha256:47b1782ed130d890018d983fac37c2a80799d9e0b9c532e734c67cf70f185033"},
|
||||||
|
{file = "ua_parser-0.10.0-py2.py3-none-any.whl", hash = "sha256:46ab2e383c01dbd2ab284991b87d624a26a08f72da4d7d413f5bfab8b9036f8a"},
|
||||||
|
]
|
||||||
urllib3 = [
|
urllib3 = [
|
||||||
{file = "urllib3-1.26.9-py2.py3-none-any.whl", hash = "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14"},
|
{file = "urllib3-1.26.9-py2.py3-none-any.whl", hash = "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14"},
|
||||||
{file = "urllib3-1.26.9.tar.gz", hash = "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"},
|
{file = "urllib3-1.26.9.tar.gz", hash = "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"},
|
||||||
|
|
|
@ -65,6 +65,7 @@ chardet = "^4.0.0"
|
||||||
Flask-Cors = "^3.0.10"
|
Flask-Cors = "^3.0.10"
|
||||||
pyhashlookup = "^1.1.1"
|
pyhashlookup = "^1.1.1"
|
||||||
lief = "^0.12.0"
|
lief = "^0.12.0"
|
||||||
|
ua-parser = "^0.10.0"
|
||||||
|
|
||||||
[tool.poetry.extras]
|
[tool.poetry.extras]
|
||||||
misp = ['python-magic', 'pydeep']
|
misp = ['python-magic', 'pydeep']
|
||||||
|
|
Loading…
Reference in New Issue