chg: Bump deps

pull/382/head
Raphaël Vinot 2022-03-29 21:13:02 +02:00
parent 6d27b861b3
commit ae9cb3e81c
9 changed files with 54 additions and 19 deletions

View File

@ -8,9 +8,9 @@ from datetime import date, timedelta
from typing import Any, Dict
from redis import Redis
from werkzeug.useragents import UserAgent
from lookyloo.default import AbstractManager, get_config, get_homedir, get_socket_path, safe_create_dir
from lookyloo.helpers import ParsedUserAgent
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
level=logging.INFO)
@ -47,7 +47,7 @@ class Processing(AbstractManager):
to_store: Dict[str, Any] = {'by_frequency': []}
uas = Counter([entry.split('|', 1)[1] for entry in entries])
for ua, _ in uas.most_common():
parsed_ua = UserAgent(ua)
parsed_ua = ParsedUserAgent(ua)
if not parsed_ua.platform or not parsed_ua.browser:
continue
if parsed_ua.platform not in to_store:

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import logging
@ -352,7 +351,7 @@ class CapturesIndex(Mapping):
host_cnames[str(answer.name).rstrip('.')] = None
if answer.rdtype in [dns.rdatatype.RdataType.A, dns.rdatatype.RdataType.AAAA]:
host_ips[str(answer.name).rstrip('.')] = list(set(str(b) for b in answer))
host_ips[str(answer.name).rstrip('.')] = list({str(b) for b in answer})
except Exception:
host_cnames[node.name] = None
host_ips[node.name] = []

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import logging

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from .default import LookylooException

View File

@ -1,9 +1,10 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import hashlib
import json
import logging
import os
import pkg_resources
from datetime import datetime, timedelta
from enum import IntEnum, unique
from functools import lru_cache
@ -12,12 +13,15 @@ from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Tuple, Union
from urllib.parse import urljoin, urlparse
import pkg_resources
import requests
from har2tree import CrawledTree, HostNode, URLNode
from publicsuffix2 import PublicSuffixList, fetch # type: ignore
from pytaxonomies import Taxonomies
from requests.exceptions import HTTPError
from ua_parser import user_agent_parser # type: ignore
from werkzeug.user_agent import UserAgent
from werkzeug.utils import cached_property
from .default import get_homedir, safe_create_dir, get_config
@ -184,3 +188,28 @@ def get_cache_directory(root: Path, identifier: str, namespace: Optional[str] =
if namespace:
root = root / namespace
return root / digest[0] / digest[1] / digest[2] / digest
class ParsedUserAgent(UserAgent):
# from https://python.tutorialink.com/how-do-i-get-the-user-agent-with-flask/
@cached_property
def _details(self):
return user_agent_parser.Parse(self.string)
@property
def platform(self):
return self._details['os'].get('family')
@property
def browser(self):
return self._details['user_agent'].get('family')
@property
def version(self):
return '.'.join(
part
for key in ('major', 'minor', 'patch')
if (part := self._details['user_agent'][key]) is not None
)

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import hashlib
import logging

View File

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import base64
import json
@ -22,7 +21,6 @@ from PIL import Image # type: ignore
from pymisp import MISPAttribute, MISPEvent, MISPObject
from redis import ConnectionPool, Redis
from redis.connection import UnixDomainSocketConnection
from werkzeug.useragents import UserAgent
from .capturecache import CaptureCache, CapturesIndex
from .context import Context
@ -30,7 +28,8 @@ from .default import LookylooException, get_homedir, get_config, get_socket_path
from .exceptions import (MissingCaptureDirectory,
MissingUUID, TreeNeedsRebuild, NoValidHarFile)
from .helpers import (CaptureStatus, get_captures_dir, get_email_template,
get_resources_hashes, get_splash_url, get_taxonomies, uniq_domains)
get_resources_hashes, get_splash_url, get_taxonomies,
uniq_domains, ParsedUserAgent)
from .indexing import Indexing
from .modules import (MISP, PhishingInitiative, UniversalWhois,
UrlScan, VirusTotal, Phishtank, Hashlookup)
@ -153,7 +152,7 @@ class Lookyloo():
meta = {}
ct = self.get_crawled_tree(capture_uuid)
ua = UserAgent(ct.root_hartree.user_agent)
ua = ParsedUserAgent(ct.root_hartree.user_agent)
meta['user_agent'] = ua.string
if ua.platform:
meta['os'] = ua.platform
@ -162,8 +161,6 @@ class Lookyloo():
meta['browser'] = f'{ua.browser} {ua.version}'
else:
meta['browser'] = ua.browser
if ua.language:
meta['language'] = ua.language
if not meta:
# UA not recognized
@ -193,7 +190,7 @@ class Lookyloo():
# get existing categories if possible
if categ_file.exists():
with categ_file.open() as f:
current_categories = set(line.strip() for line in f.readlines())
current_categories = {line.strip() for line in f.readlines()}
else:
current_categories = set()
current_categories.add(category)
@ -208,7 +205,7 @@ class Lookyloo():
# get existing categories if possible
if categ_file.exists():
with categ_file.open() as f:
current_categories = set(line.strip() for line in f.readlines())
current_categories = {line.strip() for line in f.readlines()}
else:
current_categories = set()
current_categories.remove(category)
@ -731,7 +728,7 @@ class Lookyloo():
ct = self.get_crawled_tree(tree_uuid)
hashes = ct.root_hartree.build_all_hashes(algorithm)
if urls_only:
return {h: set(node.name for node in nodes) for h, nodes in hashes.items()}
return {h: {node.name for node in nodes} for h, nodes in hashes.items()}
return hashes
def merge_hashlookup_tree(self, tree_uuid: str, /) -> Tuple[Dict[str, Dict[str, Any]], int]:

14
poetry.lock generated
View File

@ -1392,6 +1392,14 @@ category = "main"
optional = false
python-versions = ">=3.6"
[[package]]
name = "ua-parser"
version = "0.10.0"
description = "Python port of Browserscope's user agent parser"
category = "main"
optional = false
python-versions = "*"
[[package]]
name = "urllib3"
version = "1.26.9"
@ -1514,7 +1522,7 @@ misp = ["python-magic", "pydeep"]
[metadata]
lock-version = "1.1"
python-versions = ">=3.8,<3.11"
content-hash = "2ccde1664f74c0986cfaf8449dd2e76cbc19100f9d43ddb168bd9ad10756ddbc"
content-hash = "ae88c727752a91752586821db041fcb5de7177df6ff8d7fb448de7b2c05af0e1"
[metadata.files]
aiohttp = [
@ -2537,6 +2545,10 @@ typing-extensions = [
{file = "typing_extensions-4.1.1-py3-none-any.whl", hash = "sha256:21c85e0fe4b9a155d0799430b0ad741cdce7e359660ccbd8b530613e8df88ce2"},
{file = "typing_extensions-4.1.1.tar.gz", hash = "sha256:1a9462dcc3347a79b1f1c0271fbe79e844580bb598bafa1ed208b94da3cdcd42"},
]
ua-parser = [
{file = "ua-parser-0.10.0.tar.gz", hash = "sha256:47b1782ed130d890018d983fac37c2a80799d9e0b9c532e734c67cf70f185033"},
{file = "ua_parser-0.10.0-py2.py3-none-any.whl", hash = "sha256:46ab2e383c01dbd2ab284991b87d624a26a08f72da4d7d413f5bfab8b9036f8a"},
]
urllib3 = [
{file = "urllib3-1.26.9-py2.py3-none-any.whl", hash = "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14"},
{file = "urllib3-1.26.9.tar.gz", hash = "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"},

View File

@ -65,6 +65,7 @@ chardet = "^4.0.0"
Flask-Cors = "^3.0.10"
pyhashlookup = "^1.1.1"
lief = "^0.12.0"
ua-parser = "^0.10.0"
[tool.poetry.extras]
misp = ['python-magic', 'pydeep']