chg: use template

pull/280/head
Raphaël Vinot 2021-10-18 13:06:43 +02:00
parent dc5aba5999
commit 1f998b457f
36 changed files with 192 additions and 157 deletions

View File

@ -10,9 +10,8 @@ from typing import Dict, List
from redis import Redis
from lookyloo.abstractmanager import AbstractManager
from lookyloo.helpers import (get_captures_dir, get_config, get_homedir,
get_socket_path)
from lookyloo.default import AbstractManager, get_config, get_homedir, get_socket_path
from lookyloo.helpers import get_captures_dir
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
level=logging.INFO)

View File

@ -16,10 +16,8 @@ from defang import refang # type: ignore
from redis import Redis
from scrapysplashwrapper import crawl
from lookyloo.abstractmanager import AbstractManager
from lookyloo.helpers import (get_captures_dir, get_config, get_socket_path,
get_splash_url, load_cookies, safe_create_dir,
splash_status)
from lookyloo.default import AbstractManager, get_config, get_socket_path, safe_create_dir
from lookyloo.helpers import get_captures_dir, get_splash_url, load_cookies, splash_status
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
level=logging.INFO)

View File

@ -5,7 +5,7 @@ import logging
import os
from datetime import datetime, timedelta
from lookyloo.abstractmanager import AbstractManager
from lookyloo.default import AbstractManager
from lookyloo.exceptions import MissingUUID, NoValidHarFile
from lookyloo.lookyloo import Lookyloo

View File

@ -10,9 +10,7 @@ from typing import Any, Dict
from redis import Redis
from werkzeug.useragents import UserAgent
from lookyloo.abstractmanager import AbstractManager
from lookyloo.helpers import (get_config, get_homedir, get_socket_path,
safe_create_dir)
from lookyloo.default import AbstractManager, get_config, get_homedir, get_socket_path, safe_create_dir
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
level=logging.INFO)

View File

@ -11,7 +11,7 @@ from typing import List, Optional, Union
from redis import Redis
from redis.exceptions import ConnectionError
from lookyloo.helpers import get_homedir, get_socket_path
from lookyloo.default import get_homedir, get_socket_path
def check_running(name: str) -> bool:

View File

@ -3,7 +3,7 @@
import time
from lookyloo.abstractmanager import AbstractManager
from lookyloo.default import AbstractManager
def main():

View File

@ -3,7 +3,7 @@
from subprocess import Popen, run
from lookyloo.helpers import get_config, get_homedir
from lookyloo.default import get_config, get_homedir
def main():

View File

@ -4,8 +4,7 @@
import logging
from subprocess import Popen
from lookyloo.abstractmanager import AbstractManager
from lookyloo.helpers import get_config, get_homedir
from lookyloo.default import get_config, get_homedir, AbstractManager
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
level=logging.INFO)

View File

@ -5,7 +5,7 @@ from subprocess import Popen
from redis import Redis
from lookyloo.helpers import get_homedir, get_socket_path
from lookyloo.default import get_homedir, get_socket_path
def main():

View File

@ -10,7 +10,7 @@ import subprocess
import sys
from pathlib import Path
from lookyloo.helpers import get_homedir
from lookyloo.default import get_homedir
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
level=logging.INFO)

View File

@ -6,6 +6,7 @@
"public_domain": "lookyloo.myorg.local",
"website_listen_ip": "0.0.0.0",
"website_listen_port": 5100,
"systemd_service_name": "lookyloo",
"splash_url": "http://127.0.0.1:8050",
"default_user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36",
"users": {},
@ -52,6 +53,7 @@
"public_domain": "Domain where the instance can be reached. Used for permalinks (e-mail, MISP export).",
"website_listen_ip": "IP Flask will listen on. Defaults to 0.0.0.0, meaning all interfaces.",
"website_listen_port": "Port Flask will listen on.",
"systemd_service_name": "(Optional) Name of the systemd service if your project has one.",
"splash_url": "URL to connect to splash",
"default_user_agent": "Ultimate fallback if the capture form, or the asynchronous submission, doesn't provide a user agent.",
"users": "It is some kind of an admin accounts. Format: {username: password}",

View File

@ -20,9 +20,8 @@ from redis import Redis
from .context import Context
from .indexing import Indexing
from .exceptions import (LookylooException, MissingCaptureDirectory, NoValidHarFile,
MissingUUID, TreeNeedsRebuild)
from .helpers import try_make_file, get_config
from .default import LookylooException, try_make_file, get_config
from .exceptions import MissingCaptureDirectory, NoValidHarFile, MissingUUID, TreeNeedsRebuild
class CaptureCache():

View File

@ -10,8 +10,8 @@ from urllib.parse import urlsplit
from har2tree import CrawledTree, HostNode, URLNode
from redis import Redis
from .helpers import (get_config, get_homedir, get_resources_hashes,
get_socket_path, load_known_content, serialize_to_json)
from .default import get_config, get_homedir, get_socket_path
from .helpers import get_resources_hashes, load_known_content, serialize_to_json
from .modules import SaneJavaScript

View File

@ -0,0 +1,14 @@
env_global_name: str = 'LOOKYLOO_HOME'
from .exceptions import LookylooException # noqa
# NOTE: the imports below are there to avoid too long paths when importing the
# classes/methods in the rest of the project while keeping all that in a subdirectory
# and allow to update them easily.
# You should not have to change anything in this file below this line.
from .abstractmanager import AbstractManager # noqa
from .exceptions import MissingEnv, CreateDirectoryException, ConfigError # noqa
from .helpers import get_homedir, load_configs, get_config, safe_create_dir, get_socket_path, try_make_file # noqa

View File

@ -29,13 +29,20 @@ class AbstractManager(ABC):
@staticmethod
def is_running() -> List[Tuple[str, float]]:
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
return r.zrangebyscore('running', '-inf', '+inf', withscores=True)
try:
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
return r.zrangebyscore('running', '-inf', '+inf', withscores=True)
except ConnectionError:
print('Unable to connect to redis, the system is down.')
return []
@staticmethod
def force_shutdown():
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
r.set('shutdown', 1)
try:
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
r.set('shutdown', 1)
except ConnectionError:
print('Unable to connect to redis, the system is down.')
def set_running(self) -> None:
self.__redis.zincrby('running', 1, self.script_name)

View File

@ -0,0 +1,18 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
class LookylooException(Exception):
pass
class MissingEnv(LookylooException):
pass
class CreateDirectoryException(LookylooException):
pass
class ConfigError(LookylooException):
pass

101
lookyloo/default/helpers.py Normal file
View File

@ -0,0 +1,101 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import logging
import os
from functools import lru_cache
from pathlib import Path
from typing import Any, Dict, Optional, Union
from . import env_global_name
from .exceptions import ConfigError, CreateDirectoryException, MissingEnv
configs: Dict[str, Dict[str, Any]] = {}
logger = logging.getLogger('Helpers')
@lru_cache(64)
def get_homedir() -> Path:
if not os.environ.get(env_global_name):
# Try to open a .env file in the home directory if it exists.
if (Path(__file__).resolve().parent.parent.parent / '.env').exists():
with (Path(__file__).resolve().parent.parent.parent / '.env').open() as f:
for line in f:
key, value = line.strip().split('=', 1)
if value[0] in ['"', "'"]:
value = value[1:-1]
os.environ[key] = value
if not os.environ.get(env_global_name):
guessed_home = Path(__file__).resolve().parent.parent.parent
raise MissingEnv(f"{env_global_name} is missing. \
Run the following command (assuming you run the code from the clonned repository):\
export {env_global_name}='{guessed_home}'")
return Path(os.environ[env_global_name])
@lru_cache(64)
def load_configs(path_to_config_files: Optional[Union[str, Path]]=None):
global configs
if configs:
return
if path_to_config_files:
if isinstance(path_to_config_files, str):
config_path = Path(path_to_config_files)
else:
config_path = path_to_config_files
else:
config_path = get_homedir() / 'config'
if not config_path.exists():
raise ConfigError(f'Configuration directory {config_path} does not exists.')
elif not config_path.is_dir():
raise ConfigError(f'Configuration directory {config_path} is not a directory.')
configs = {}
for path in config_path.glob('*.json'):
with path.open() as _c:
configs[path.stem] = json.load(_c)
@lru_cache(64)
def get_config(config_type: str, entry: str, quiet: bool=False) -> Any:
"""Get an entry from the given config_type file. Automatic fallback to the sample file"""
global configs
if not configs:
load_configs()
if config_type in configs:
if entry in configs[config_type]:
return configs[config_type][entry]
else:
if not quiet:
logger.warning(f'Unable to find {entry} in config file.')
else:
if not quiet:
logger.warning(f'No {config_type} config file available.')
if not quiet:
logger.warning(f'Falling back on sample config, please initialize the {config_type} config file.')
with (get_homedir() / 'config' / f'{config_type}.json.sample').open() as _c:
sample_config = json.load(_c)
return sample_config[entry]
def safe_create_dir(to_create: Path) -> None:
if to_create.exists() and not to_create.is_dir():
raise CreateDirectoryException(f'The path {to_create} already exists and is not a directory')
to_create.mkdir(parents=True, exist_ok=True)
def get_socket_path(name: str) -> str:
mapping = {
'cache': Path('cache', 'cache.sock'),
'indexing': Path('indexing', 'indexing.sock'),
}
return str(get_homedir() / mapping[name])
def try_make_file(filename: Path):
try:
filename.touch(exist_ok=False)
return True
except FileExistsError:
return False

View File

@ -1,27 +1,13 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
class LookylooException(Exception):
pass
class MissingEnv(LookylooException):
pass
from .default import LookylooException
class NoValidHarFile(LookylooException):
pass
class CreateDirectoryException(LookylooException):
pass
class ConfigError(LookylooException):
pass
class MissingUUID(LookylooException):
pass

View File

@ -18,9 +18,8 @@ from publicsuffix2 import PublicSuffixList, fetch # type: ignore
from pytaxonomies import Taxonomies
from requests.exceptions import HTTPError
from .exceptions import ConfigError, CreateDirectoryException, MissingEnv
from .default import get_homedir, safe_create_dir, get_config
configs: Dict[str, Dict[str, Any]] = {}
logger = logging.getLogger('Lookyloo - Helpers')
@ -71,26 +70,6 @@ def get_public_suffix_list():
return psl
@lru_cache(64)
def get_homedir() -> Path:
if not os.environ.get('LOOKYLOO_HOME'):
# Try to open a .env file in the home directory if it exists.
if (Path(__file__).resolve().parent.parent / '.env').exists():
with (Path(__file__).resolve().parent.parent / '.env').open() as f:
for line in f:
key, value = line.strip().split('=', 1)
if value[0] in ['"', "'"]:
value = value[1:-1]
os.environ[key] = value
if not os.environ.get('LOOKYLOO_HOME'):
guessed_home = Path(__file__).resolve().parent.parent
raise MissingEnv(f"LOOKYLOO_HOME is missing. \
Run the following command (assuming you run the code from the clonned repository):\
export LOOKYLOO_HOME='{guessed_home}'")
return Path(os.environ['LOOKYLOO_HOME'])
@lru_cache(64)
def get_captures_dir() -> Path:
capture_dir = get_homedir() / 'scraped'
@ -104,66 +83,6 @@ def get_email_template() -> str:
return f.read()
@lru_cache(64)
def load_configs(path_to_config_files: Optional[Union[str, Path]]=None):
global configs
if configs:
return
if path_to_config_files:
if isinstance(path_to_config_files, str):
config_path = Path(path_to_config_files)
else:
config_path = path_to_config_files
else:
config_path = get_homedir() / 'config'
if not config_path.exists():
raise ConfigError(f'Configuration directory {config_path} does not exists.')
elif not config_path.is_dir():
raise ConfigError(f'Configuration directory {config_path} is not a directory.')
configs = {}
for path in config_path.glob('*.json'):
with path.open() as _c:
configs[path.stem] = json.load(_c)
@lru_cache(64)
def get_config(config_type: str, entry: str, quiet: bool=False) -> Any:
"""Get an entry from the given config_type file. Automatic fallback to the sample file"""
global configs
if not configs:
load_configs()
if config_type in configs:
if entry in configs[config_type]:
return configs[config_type][entry]
else:
if not quiet:
logger.warning(f'Unable to find {entry} in config file.')
else:
if not quiet:
logger.warning(f'No {config_type} config file available.')
if not quiet:
logger.warning(f'Falling back on sample config, please initialize the {config_type} config file.')
with (get_homedir() / 'config' / f'{config_type}.json.sample').open() as _c:
sample_config = json.load(_c)
return sample_config[entry]
def safe_create_dir(to_create: Path) -> None:
if to_create.exists() and not to_create.is_dir():
raise CreateDirectoryException(f'The path {to_create} already exists and is not a directory')
to_create.mkdir(parents=True, exist_ok=True)
def get_socket_path(name: str) -> str:
mapping = {
'cache': Path('cache', 'cache.sock'),
'indexing': Path('indexing', 'indexing.sock'),
'storage': Path('storage', 'storage.sock'),
}
return str(get_homedir() / mapping[name])
def get_user_agents(directory: str='user_agents') -> Dict[str, Any]:
ua_files_path = sorted((get_homedir() / directory).glob('**/*.json'), reverse=True)
with ua_files_path[0].open() as f:
@ -223,14 +142,6 @@ def uniq_domains(uniq_urls):
return domains
def try_make_file(filename: Path):
try:
filename.touch(exist_ok=False)
return True
except FileExistsError:
return False
@lru_cache(64)
def get_useragent_for_requests():
version = pkg_resources.get_distribution('lookyloo').version

View File

@ -12,7 +12,8 @@ from har2tree import CrawledTree
from redis import ConnectionPool, Redis
from redis.connection import UnixDomainSocketConnection
from .helpers import get_public_suffix_list, get_socket_path, get_config
from .default import get_socket_path, get_config
from .helpers import get_public_suffix_list
class Indexing():

View File

@ -25,11 +25,11 @@ from werkzeug.useragents import UserAgent
from .capturecache import CaptureCache, CapturesIndex
from .context import Context
from .exceptions import (LookylooException, MissingCaptureDirectory,
from .default import LookylooException, get_homedir, get_config, get_socket_path
from .exceptions import (MissingCaptureDirectory,
MissingUUID, TreeNeedsRebuild, NoValidHarFile)
from .helpers import (CaptureStatus, get_captures_dir, get_config,
get_email_template, get_homedir, get_resources_hashes,
get_socket_path, get_splash_url, get_taxonomies, uniq_domains)
from .helpers import (CaptureStatus, get_captures_dir, get_email_template,
get_resources_hashes, get_splash_url, get_taxonomies, uniq_domains)
from .indexing import Indexing
from .modules import (MISP, PhishingInitiative, UniversalWhois,
UrlScan, VirusTotal, Phishtank)

View File

@ -11,7 +11,8 @@ from har2tree import HostNode, URLNode, Har2TreeError
from pymisp import MISPAttribute, MISPEvent, PyMISP
from pymisp.tools import FileObject, URLObject
from ..helpers import get_config, get_homedir, get_public_suffix_list
from ..default import get_config, get_homedir
from ..helpers import get_public_suffix_list
if TYPE_CHECKING:
from ..capturecache import CaptureCache

View File

@ -10,8 +10,7 @@ from typing import Any, Dict, Optional, List
from har2tree import CrawledTree
from pyphishtanklookup import PhishtankLookup
from ..exceptions import ConfigError
from ..helpers import get_homedir
from ..default import ConfigError, get_homedir
# Note: stop doing requests 48 after the capture was intially done.

View File

@ -11,8 +11,7 @@ from typing import Any, Dict, Optional
from har2tree import CrawledTree
from pyeupi import PyEUPI
from ..exceptions import ConfigError
from ..helpers import get_homedir
from ..default import ConfigError, get_homedir
class PhishingInitiative():

View File

@ -8,7 +8,7 @@ from typing import Any, Dict, Iterable, List, Union
from pysanejs import SaneJS
from ..helpers import get_config, get_homedir
from ..default import get_config, get_homedir
class SaneJavaScript():

View File

@ -10,8 +10,8 @@ from typing import Any, Dict
import requests
from ..exceptions import ConfigError
from ..helpers import get_config, get_homedir, get_useragent_for_requests
from ..default import ConfigError, get_config, get_homedir
from ..helpers import get_useragent_for_requests
class UrlScan():

View File

@ -7,7 +7,7 @@ from typing import Any, Dict
from har2tree import CrawledTree, Har2TreeError, HostNode
from ..helpers import get_config
from ..default import get_config
class UniversalWhois():

View File

@ -12,8 +12,7 @@ import vt # type: ignore
from har2tree import CrawledTree
from vt.error import APIError # type: ignore
from ..exceptions import ConfigError
from ..helpers import get_homedir
from ..default import ConfigError, get_homedir
class VirusTotal():

View File

@ -3,7 +3,7 @@
import requests
from lookyloo.helpers import get_homedir
from lookyloo.default import get_homedir
d3js_version = '7'
datatables_version = "1.11.3"

View File

@ -6,7 +6,8 @@ from pathlib import Path
from redis import Redis
from lookyloo.helpers import get_captures_dir, safe_create_dir, get_socket_path
from lookyloo.default import safe_create_dir, get_socket_path
from lookyloo.helpers import get_captures_dir
def rename_captures():

View File

@ -7,7 +7,7 @@ import json
from typing import Dict
from lookyloo.helpers import get_homedir
from lookyloo.default import get_homedir
if __name__ == '__main__':
dest_dir = get_homedir() / 'website' / 'web'

View File

@ -14,7 +14,7 @@ try:
except ImportError:
HAS_CF = False
from lookyloo.helpers import get_homedir, safe_create_dir
from lookyloo.default import get_homedir, safe_create_dir
def update_user_agents() -> None:
@ -63,6 +63,7 @@ def ua_parser(html_content: str) -> Dict[str, Any]:
to_store['by_frequency'].append({'os': os, 'browser': browser, 'useragent': ua['useragent']})
return to_store
def main():
to_parse = Path('Most Common User Agents - Tech Blog (wh).html')
@ -77,5 +78,6 @@ def main():
with open(ua_file_name, 'w') as f:
json.dump(to_store, f, indent=2)
if __name__ == '__main__':
main()

View File

@ -11,8 +11,8 @@ from redis.exceptions import ConnectionError
from rich.console import Console
from rich.padding import Padding
from lookyloo.helpers import get_socket_path, splash_status
from lookyloo.abstractmanager import AbstractManager
from lookyloo.default import get_socket_path, AbstractManager
from lookyloo.helpers import splash_status
# NOTE: run with watch:
# watch --color tools/monitoring.py

View File

@ -5,7 +5,7 @@ import json
import logging
import argparse
from lookyloo.helpers import get_homedir
from lookyloo.default import get_homedir
def validate_generic_config_file():

View File

@ -21,8 +21,9 @@ from flask_restx import Api # type: ignore
from pymisp import MISPEvent, MISPServerError
from werkzeug.security import check_password_hash
from lookyloo.default import get_config
from lookyloo.exceptions import MissingUUID, NoValidHarFile
from lookyloo.helpers import (CaptureStatus, get_config, get_taxonomies,
from lookyloo.helpers import (CaptureStatus, get_taxonomies,
get_user_agents, load_cookies, splash_status)
from lookyloo.lookyloo import Indexing, Lookyloo

View File

@ -11,7 +11,7 @@ from typing import Dict, List, Union
import flask_login # type: ignore
from werkzeug.security import generate_password_hash
from lookyloo.helpers import get_config, get_homedir
from lookyloo.default import get_config, get_homedir
def src_request_ip(request) -> str: