chg: Bump deps

pull/382/head
Raphaël Vinot 2022-03-29 21:13:02 +02:00
parent 6d27b861b3
commit ae9cb3e81c
9 changed files with 54 additions and 19 deletions

View File

@ -8,9 +8,9 @@ from datetime import date, timedelta
from typing import Any, Dict from typing import Any, Dict
from redis import Redis from redis import Redis
from werkzeug.useragents import UserAgent
from lookyloo.default import AbstractManager, get_config, get_homedir, get_socket_path, safe_create_dir from lookyloo.default import AbstractManager, get_config, get_homedir, get_socket_path, safe_create_dir
from lookyloo.helpers import ParsedUserAgent
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s', logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
level=logging.INFO) level=logging.INFO)
@ -47,7 +47,7 @@ class Processing(AbstractManager):
to_store: Dict[str, Any] = {'by_frequency': []} to_store: Dict[str, Any] = {'by_frequency': []}
uas = Counter([entry.split('|', 1)[1] for entry in entries]) uas = Counter([entry.split('|', 1)[1] for entry in entries])
for ua, _ in uas.most_common(): for ua, _ in uas.most_common():
parsed_ua = UserAgent(ua) parsed_ua = ParsedUserAgent(ua)
if not parsed_ua.platform or not parsed_ua.browser: if not parsed_ua.platform or not parsed_ua.browser:
continue continue
if parsed_ua.platform not in to_store: if parsed_ua.platform not in to_store:

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json import json
import logging import logging
@ -352,7 +351,7 @@ class CapturesIndex(Mapping):
host_cnames[str(answer.name).rstrip('.')] = None host_cnames[str(answer.name).rstrip('.')] = None
if answer.rdtype in [dns.rdatatype.RdataType.A, dns.rdatatype.RdataType.AAAA]: if answer.rdtype in [dns.rdatatype.RdataType.A, dns.rdatatype.RdataType.AAAA]:
host_ips[str(answer.name).rstrip('.')] = list(set(str(b) for b in answer)) host_ips[str(answer.name).rstrip('.')] = list({str(b) for b in answer})
except Exception: except Exception:
host_cnames[node.name] = None host_cnames[node.name] = None
host_ips[node.name] = [] host_ips[node.name] = []

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json import json
import logging import logging

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*-
from .default import LookylooException from .default import LookylooException

View File

@ -1,9 +1,10 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*-
import hashlib import hashlib
import json import json
import logging import logging
import os import os
import pkg_resources
from datetime import datetime, timedelta from datetime import datetime, timedelta
from enum import IntEnum, unique from enum import IntEnum, unique
from functools import lru_cache from functools import lru_cache
@ -12,12 +13,15 @@ from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Tuple, Union from typing import Any, Dict, List, Optional, Set, Tuple, Union
from urllib.parse import urljoin, urlparse from urllib.parse import urljoin, urlparse
import pkg_resources
import requests import requests
from har2tree import CrawledTree, HostNode, URLNode from har2tree import CrawledTree, HostNode, URLNode
from publicsuffix2 import PublicSuffixList, fetch # type: ignore from publicsuffix2 import PublicSuffixList, fetch # type: ignore
from pytaxonomies import Taxonomies from pytaxonomies import Taxonomies
from requests.exceptions import HTTPError from requests.exceptions import HTTPError
from ua_parser import user_agent_parser # type: ignore
from werkzeug.user_agent import UserAgent
from werkzeug.utils import cached_property
from .default import get_homedir, safe_create_dir, get_config from .default import get_homedir, safe_create_dir, get_config
@ -184,3 +188,28 @@ def get_cache_directory(root: Path, identifier: str, namespace: Optional[str] =
if namespace: if namespace:
root = root / namespace root = root / namespace
return root / digest[0] / digest[1] / digest[2] / digest return root / digest[0] / digest[1] / digest[2] / digest
class ParsedUserAgent(UserAgent):
# from https://python.tutorialink.com/how-do-i-get-the-user-agent-with-flask/
@cached_property
def _details(self):
return user_agent_parser.Parse(self.string)
@property
def platform(self):
return self._details['os'].get('family')
@property
def browser(self):
return self._details['user_agent'].get('family')
@property
def version(self):
return '.'.join(
part
for key in ('major', 'minor', 'patch')
if (part := self._details['user_agent'][key]) is not None
)

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*-
import hashlib import hashlib
import logging import logging

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*-
import base64 import base64
import json import json
@ -22,7 +21,6 @@ from PIL import Image # type: ignore
from pymisp import MISPAttribute, MISPEvent, MISPObject from pymisp import MISPAttribute, MISPEvent, MISPObject
from redis import ConnectionPool, Redis from redis import ConnectionPool, Redis
from redis.connection import UnixDomainSocketConnection from redis.connection import UnixDomainSocketConnection
from werkzeug.useragents import UserAgent
from .capturecache import CaptureCache, CapturesIndex from .capturecache import CaptureCache, CapturesIndex
from .context import Context from .context import Context
@ -30,7 +28,8 @@ from .default import LookylooException, get_homedir, get_config, get_socket_path
from .exceptions import (MissingCaptureDirectory, from .exceptions import (MissingCaptureDirectory,
MissingUUID, TreeNeedsRebuild, NoValidHarFile) MissingUUID, TreeNeedsRebuild, NoValidHarFile)
from .helpers import (CaptureStatus, get_captures_dir, get_email_template, from .helpers import (CaptureStatus, get_captures_dir, get_email_template,
get_resources_hashes, get_splash_url, get_taxonomies, uniq_domains) get_resources_hashes, get_splash_url, get_taxonomies,
uniq_domains, ParsedUserAgent)
from .indexing import Indexing from .indexing import Indexing
from .modules import (MISP, PhishingInitiative, UniversalWhois, from .modules import (MISP, PhishingInitiative, UniversalWhois,
UrlScan, VirusTotal, Phishtank, Hashlookup) UrlScan, VirusTotal, Phishtank, Hashlookup)
@ -153,7 +152,7 @@ class Lookyloo():
meta = {} meta = {}
ct = self.get_crawled_tree(capture_uuid) ct = self.get_crawled_tree(capture_uuid)
ua = UserAgent(ct.root_hartree.user_agent) ua = ParsedUserAgent(ct.root_hartree.user_agent)
meta['user_agent'] = ua.string meta['user_agent'] = ua.string
if ua.platform: if ua.platform:
meta['os'] = ua.platform meta['os'] = ua.platform
@ -162,8 +161,6 @@ class Lookyloo():
meta['browser'] = f'{ua.browser} {ua.version}' meta['browser'] = f'{ua.browser} {ua.version}'
else: else:
meta['browser'] = ua.browser meta['browser'] = ua.browser
if ua.language:
meta['language'] = ua.language
if not meta: if not meta:
# UA not recognized # UA not recognized
@ -193,7 +190,7 @@ class Lookyloo():
# get existing categories if possible # get existing categories if possible
if categ_file.exists(): if categ_file.exists():
with categ_file.open() as f: with categ_file.open() as f:
current_categories = set(line.strip() for line in f.readlines()) current_categories = {line.strip() for line in f.readlines()}
else: else:
current_categories = set() current_categories = set()
current_categories.add(category) current_categories.add(category)
@ -208,7 +205,7 @@ class Lookyloo():
# get existing categories if possible # get existing categories if possible
if categ_file.exists(): if categ_file.exists():
with categ_file.open() as f: with categ_file.open() as f:
current_categories = set(line.strip() for line in f.readlines()) current_categories = {line.strip() for line in f.readlines()}
else: else:
current_categories = set() current_categories = set()
current_categories.remove(category) current_categories.remove(category)
@ -731,7 +728,7 @@ class Lookyloo():
ct = self.get_crawled_tree(tree_uuid) ct = self.get_crawled_tree(tree_uuid)
hashes = ct.root_hartree.build_all_hashes(algorithm) hashes = ct.root_hartree.build_all_hashes(algorithm)
if urls_only: if urls_only:
return {h: set(node.name for node in nodes) for h, nodes in hashes.items()} return {h: {node.name for node in nodes} for h, nodes in hashes.items()}
return hashes return hashes
def merge_hashlookup_tree(self, tree_uuid: str, /) -> Tuple[Dict[str, Dict[str, Any]], int]: def merge_hashlookup_tree(self, tree_uuid: str, /) -> Tuple[Dict[str, Dict[str, Any]], int]:

14
poetry.lock generated
View File

@ -1392,6 +1392,14 @@ category = "main"
optional = false optional = false
python-versions = ">=3.6" python-versions = ">=3.6"
[[package]]
name = "ua-parser"
version = "0.10.0"
description = "Python port of Browserscope's user agent parser"
category = "main"
optional = false
python-versions = "*"
[[package]] [[package]]
name = "urllib3" name = "urllib3"
version = "1.26.9" version = "1.26.9"
@ -1514,7 +1522,7 @@ misp = ["python-magic", "pydeep"]
[metadata] [metadata]
lock-version = "1.1" lock-version = "1.1"
python-versions = ">=3.8,<3.11" python-versions = ">=3.8,<3.11"
content-hash = "2ccde1664f74c0986cfaf8449dd2e76cbc19100f9d43ddb168bd9ad10756ddbc" content-hash = "ae88c727752a91752586821db041fcb5de7177df6ff8d7fb448de7b2c05af0e1"
[metadata.files] [metadata.files]
aiohttp = [ aiohttp = [
@ -2537,6 +2545,10 @@ typing-extensions = [
{file = "typing_extensions-4.1.1-py3-none-any.whl", hash = "sha256:21c85e0fe4b9a155d0799430b0ad741cdce7e359660ccbd8b530613e8df88ce2"}, {file = "typing_extensions-4.1.1-py3-none-any.whl", hash = "sha256:21c85e0fe4b9a155d0799430b0ad741cdce7e359660ccbd8b530613e8df88ce2"},
{file = "typing_extensions-4.1.1.tar.gz", hash = "sha256:1a9462dcc3347a79b1f1c0271fbe79e844580bb598bafa1ed208b94da3cdcd42"}, {file = "typing_extensions-4.1.1.tar.gz", hash = "sha256:1a9462dcc3347a79b1f1c0271fbe79e844580bb598bafa1ed208b94da3cdcd42"},
] ]
ua-parser = [
{file = "ua-parser-0.10.0.tar.gz", hash = "sha256:47b1782ed130d890018d983fac37c2a80799d9e0b9c532e734c67cf70f185033"},
{file = "ua_parser-0.10.0-py2.py3-none-any.whl", hash = "sha256:46ab2e383c01dbd2ab284991b87d624a26a08f72da4d7d413f5bfab8b9036f8a"},
]
urllib3 = [ urllib3 = [
{file = "urllib3-1.26.9-py2.py3-none-any.whl", hash = "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14"}, {file = "urllib3-1.26.9-py2.py3-none-any.whl", hash = "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14"},
{file = "urllib3-1.26.9.tar.gz", hash = "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"}, {file = "urllib3-1.26.9.tar.gz", hash = "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"},

View File

@ -65,6 +65,7 @@ chardet = "^4.0.0"
Flask-Cors = "^3.0.10" Flask-Cors = "^3.0.10"
pyhashlookup = "^1.1.1" pyhashlookup = "^1.1.1"
lief = "^0.12.0" lief = "^0.12.0"
ua-parser = "^0.10.0"
[tool.poetry.extras] [tool.poetry.extras]
misp = ['python-magic', 'pydeep'] misp = ['python-magic', 'pydeep']