mirror of https://github.com/CIRCL/lookyloo
chg: Bump deps
parent
6d27b861b3
commit
ae9cb3e81c
|
@ -8,9 +8,9 @@ from datetime import date, timedelta
|
|||
from typing import Any, Dict
|
||||
|
||||
from redis import Redis
|
||||
from werkzeug.useragents import UserAgent
|
||||
|
||||
from lookyloo.default import AbstractManager, get_config, get_homedir, get_socket_path, safe_create_dir
|
||||
from lookyloo.helpers import ParsedUserAgent
|
||||
|
||||
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
||||
level=logging.INFO)
|
||||
|
@ -47,7 +47,7 @@ class Processing(AbstractManager):
|
|||
to_store: Dict[str, Any] = {'by_frequency': []}
|
||||
uas = Counter([entry.split('|', 1)[1] for entry in entries])
|
||||
for ua, _ in uas.most_common():
|
||||
parsed_ua = UserAgent(ua)
|
||||
parsed_ua = ParsedUserAgent(ua)
|
||||
if not parsed_ua.platform or not parsed_ua.browser:
|
||||
continue
|
||||
if parsed_ua.platform not in to_store:
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import json
|
||||
import logging
|
||||
|
@ -352,7 +351,7 @@ class CapturesIndex(Mapping):
|
|||
host_cnames[str(answer.name).rstrip('.')] = None
|
||||
|
||||
if answer.rdtype in [dns.rdatatype.RdataType.A, dns.rdatatype.RdataType.AAAA]:
|
||||
host_ips[str(answer.name).rstrip('.')] = list(set(str(b) for b in answer))
|
||||
host_ips[str(answer.name).rstrip('.')] = list({str(b) for b in answer})
|
||||
except Exception:
|
||||
host_cnames[node.name] = None
|
||||
host_ips[node.name] = []
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import json
|
||||
import logging
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from .default import LookylooException
|
||||
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import pkg_resources
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from enum import IntEnum, unique
|
||||
from functools import lru_cache
|
||||
|
@ -12,12 +13,15 @@ from pathlib import Path
|
|||
from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
import pkg_resources
|
||||
import requests
|
||||
|
||||
from har2tree import CrawledTree, HostNode, URLNode
|
||||
from publicsuffix2 import PublicSuffixList, fetch # type: ignore
|
||||
from pytaxonomies import Taxonomies
|
||||
from requests.exceptions import HTTPError
|
||||
from ua_parser import user_agent_parser # type: ignore
|
||||
from werkzeug.user_agent import UserAgent
|
||||
from werkzeug.utils import cached_property
|
||||
|
||||
from .default import get_homedir, safe_create_dir, get_config
|
||||
|
||||
|
@ -184,3 +188,28 @@ def get_cache_directory(root: Path, identifier: str, namespace: Optional[str] =
|
|||
if namespace:
|
||||
root = root / namespace
|
||||
return root / digest[0] / digest[1] / digest[2] / digest
|
||||
|
||||
|
||||
class ParsedUserAgent(UserAgent):
|
||||
|
||||
# from https://python.tutorialink.com/how-do-i-get-the-user-agent-with-flask/
|
||||
|
||||
@cached_property
|
||||
def _details(self):
|
||||
return user_agent_parser.Parse(self.string)
|
||||
|
||||
@property
|
||||
def platform(self):
|
||||
return self._details['os'].get('family')
|
||||
|
||||
@property
|
||||
def browser(self):
|
||||
return self._details['user_agent'].get('family')
|
||||
|
||||
@property
|
||||
def version(self):
|
||||
return '.'.join(
|
||||
part
|
||||
for key in ('major', 'minor', 'patch')
|
||||
if (part := self._details['user_agent'][key]) is not None
|
||||
)
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import base64
|
||||
import json
|
||||
|
@ -22,7 +21,6 @@ from PIL import Image # type: ignore
|
|||
from pymisp import MISPAttribute, MISPEvent, MISPObject
|
||||
from redis import ConnectionPool, Redis
|
||||
from redis.connection import UnixDomainSocketConnection
|
||||
from werkzeug.useragents import UserAgent
|
||||
|
||||
from .capturecache import CaptureCache, CapturesIndex
|
||||
from .context import Context
|
||||
|
@ -30,7 +28,8 @@ from .default import LookylooException, get_homedir, get_config, get_socket_path
|
|||
from .exceptions import (MissingCaptureDirectory,
|
||||
MissingUUID, TreeNeedsRebuild, NoValidHarFile)
|
||||
from .helpers import (CaptureStatus, get_captures_dir, get_email_template,
|
||||
get_resources_hashes, get_splash_url, get_taxonomies, uniq_domains)
|
||||
get_resources_hashes, get_splash_url, get_taxonomies,
|
||||
uniq_domains, ParsedUserAgent)
|
||||
from .indexing import Indexing
|
||||
from .modules import (MISP, PhishingInitiative, UniversalWhois,
|
||||
UrlScan, VirusTotal, Phishtank, Hashlookup)
|
||||
|
@ -153,7 +152,7 @@ class Lookyloo():
|
|||
|
||||
meta = {}
|
||||
ct = self.get_crawled_tree(capture_uuid)
|
||||
ua = UserAgent(ct.root_hartree.user_agent)
|
||||
ua = ParsedUserAgent(ct.root_hartree.user_agent)
|
||||
meta['user_agent'] = ua.string
|
||||
if ua.platform:
|
||||
meta['os'] = ua.platform
|
||||
|
@ -162,8 +161,6 @@ class Lookyloo():
|
|||
meta['browser'] = f'{ua.browser} {ua.version}'
|
||||
else:
|
||||
meta['browser'] = ua.browser
|
||||
if ua.language:
|
||||
meta['language'] = ua.language
|
||||
|
||||
if not meta:
|
||||
# UA not recognized
|
||||
|
@ -193,7 +190,7 @@ class Lookyloo():
|
|||
# get existing categories if possible
|
||||
if categ_file.exists():
|
||||
with categ_file.open() as f:
|
||||
current_categories = set(line.strip() for line in f.readlines())
|
||||
current_categories = {line.strip() for line in f.readlines()}
|
||||
else:
|
||||
current_categories = set()
|
||||
current_categories.add(category)
|
||||
|
@ -208,7 +205,7 @@ class Lookyloo():
|
|||
# get existing categories if possible
|
||||
if categ_file.exists():
|
||||
with categ_file.open() as f:
|
||||
current_categories = set(line.strip() for line in f.readlines())
|
||||
current_categories = {line.strip() for line in f.readlines()}
|
||||
else:
|
||||
current_categories = set()
|
||||
current_categories.remove(category)
|
||||
|
@ -731,7 +728,7 @@ class Lookyloo():
|
|||
ct = self.get_crawled_tree(tree_uuid)
|
||||
hashes = ct.root_hartree.build_all_hashes(algorithm)
|
||||
if urls_only:
|
||||
return {h: set(node.name for node in nodes) for h, nodes in hashes.items()}
|
||||
return {h: {node.name for node in nodes} for h, nodes in hashes.items()}
|
||||
return hashes
|
||||
|
||||
def merge_hashlookup_tree(self, tree_uuid: str, /) -> Tuple[Dict[str, Dict[str, Any]], int]:
|
||||
|
|
|
@ -1392,6 +1392,14 @@ category = "main"
|
|||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[[package]]
|
||||
name = "ua-parser"
|
||||
version = "0.10.0"
|
||||
description = "Python port of Browserscope's user agent parser"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
|
||||
[[package]]
|
||||
name = "urllib3"
|
||||
version = "1.26.9"
|
||||
|
@ -1514,7 +1522,7 @@ misp = ["python-magic", "pydeep"]
|
|||
[metadata]
|
||||
lock-version = "1.1"
|
||||
python-versions = ">=3.8,<3.11"
|
||||
content-hash = "2ccde1664f74c0986cfaf8449dd2e76cbc19100f9d43ddb168bd9ad10756ddbc"
|
||||
content-hash = "ae88c727752a91752586821db041fcb5de7177df6ff8d7fb448de7b2c05af0e1"
|
||||
|
||||
[metadata.files]
|
||||
aiohttp = [
|
||||
|
@ -2537,6 +2545,10 @@ typing-extensions = [
|
|||
{file = "typing_extensions-4.1.1-py3-none-any.whl", hash = "sha256:21c85e0fe4b9a155d0799430b0ad741cdce7e359660ccbd8b530613e8df88ce2"},
|
||||
{file = "typing_extensions-4.1.1.tar.gz", hash = "sha256:1a9462dcc3347a79b1f1c0271fbe79e844580bb598bafa1ed208b94da3cdcd42"},
|
||||
]
|
||||
ua-parser = [
|
||||
{file = "ua-parser-0.10.0.tar.gz", hash = "sha256:47b1782ed130d890018d983fac37c2a80799d9e0b9c532e734c67cf70f185033"},
|
||||
{file = "ua_parser-0.10.0-py2.py3-none-any.whl", hash = "sha256:46ab2e383c01dbd2ab284991b87d624a26a08f72da4d7d413f5bfab8b9036f8a"},
|
||||
]
|
||||
urllib3 = [
|
||||
{file = "urllib3-1.26.9-py2.py3-none-any.whl", hash = "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14"},
|
||||
{file = "urllib3-1.26.9.tar.gz", hash = "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"},
|
||||
|
|
|
@ -65,6 +65,7 @@ chardet = "^4.0.0"
|
|||
Flask-Cors = "^3.0.10"
|
||||
pyhashlookup = "^1.1.1"
|
||||
lief = "^0.12.0"
|
||||
ua-parser = "^0.10.0"
|
||||
|
||||
[tool.poetry.extras]
|
||||
misp = ['python-magic', 'pydeep']
|
||||
|
|
Loading…
Reference in New Issue