204 lines
8.5 KiB
Python
Executable File
204 lines
8.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import logging
|
|
from logging import Logger
|
|
import json
|
|
import asyncio
|
|
|
|
from typing import Tuple, Dict, List, Optional, TypeVar, Any
|
|
from datetime import datetime, date
|
|
from pathlib import Path
|
|
|
|
import aiohttp
|
|
from bs4 import BeautifulSoup # type: ignore
|
|
from dateutil.parser import parse
|
|
|
|
from bgpranking.default import AbstractManager, get_homedir, safe_create_dir
|
|
from bgpranking.helpers import get_data_dir, get_modules_dir
|
|
|
|
|
|
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
|
level=logging.INFO)
|
|
|
|
|
|
Dates = TypeVar('Dates', datetime, date, str)
|
|
|
|
|
|
class ShadowServerFetcher():
|
|
|
|
def __init__(self, user, password, logger: Logger) -> None:
|
|
self.logger = logger
|
|
self.storage_directory = get_data_dir()
|
|
self.config_path_modules = get_modules_dir()
|
|
self.user = user
|
|
self.password = password
|
|
self.index_page = 'https://dl.shadowserver.org/reports/index.php'
|
|
self.vendor = 'shadowserver'
|
|
self.known_list_types = ('blacklist', 'botnet', 'cc', 'cisco', 'cwsandbox', 'drone',
|
|
'microsoft', 'scan', 'sinkhole6', 'sinkhole', 'outdated',
|
|
'compromised', 'hp', 'darknet', 'ddos')
|
|
self.first_available_day: date
|
|
self.last_available_day: date
|
|
self.available_entries: Dict[str, List[Tuple[str, str]]] = {}
|
|
|
|
async def __get_index(self):
|
|
auth_details = {'user': self.user, 'password': self.password, 'login': 'Login'}
|
|
async with aiohttp.ClientSession() as s:
|
|
self.logger.debug('Fetching the index.')
|
|
async with s.post(self.index_page, data=auth_details) as r:
|
|
return await r.text()
|
|
|
|
async def __build_daily_dict(self):
|
|
html_index = await self.__get_index()
|
|
soup = BeautifulSoup(html_index, 'html.parser')
|
|
treeview = soup.find(id='treemenu1')
|
|
for y in treeview.select(':scope > li'):
|
|
year = y.contents[0]
|
|
for m in y.contents[1].select(':scope > li'):
|
|
month = m.contents[0]
|
|
for d in m.contents[1].select(':scope > li'):
|
|
day = d.contents[0]
|
|
date = parse(f'{year} {month} {day}').date()
|
|
self.available_entries[date.isoformat()] = []
|
|
for a in d.contents[1].find_all('a', href=True):
|
|
if not self.first_available_day:
|
|
self.first_available_day = date
|
|
self.last_available_day = date
|
|
self.available_entries[date.isoformat()].append((a['href'], a.string))
|
|
self.logger.debug('Dictionary created.')
|
|
|
|
def __normalize_day(self, day: Optional[Dates]=None) -> str:
|
|
if not day:
|
|
if not self.last_available_day:
|
|
raise Exception('Unable to figure out the last available day. You need to run build_daily_dict first')
|
|
to_return = self.last_available_day
|
|
else:
|
|
if isinstance(day, str):
|
|
to_return = parse(day).date()
|
|
elif isinstance(day, datetime):
|
|
to_return = day.date()
|
|
return to_return.isoformat()
|
|
|
|
def __split_name(self, name):
|
|
type_content, country, list_type = name.split('-')
|
|
if '_' in type_content:
|
|
type_content, details_type = type_content.split('_', maxsplit=1)
|
|
if '_' in details_type:
|
|
details_type, sub = details_type.split('_', maxsplit=1)
|
|
return list_type, country, (type_content, details_type, sub)
|
|
return list_type, country, (type_content, details_type)
|
|
return list_type, country, (type_content)
|
|
|
|
def __check_config(self, filename: str) -> Optional[Path]:
|
|
self.logger.debug(f'Working on config for {filename}.')
|
|
config: Dict[str, Any] = {'vendor': 'shadowserver', 'parser': '.parsers.shadowserver'}
|
|
type_content, _, type_details = self.__split_name(filename)
|
|
prefix = type_content.split('.')[0]
|
|
|
|
if isinstance(type_details, str):
|
|
main_type = type_details
|
|
config['name'] = '{}-{}'.format(prefix, type_details)
|
|
else:
|
|
main_type = type_details[0]
|
|
config['name'] = '{}-{}'.format(prefix, '_'.join(type_details))
|
|
|
|
if main_type not in self.known_list_types:
|
|
self.logger.warning(f'Unknown type: {main_type}. Please update the config creator script.')
|
|
return None
|
|
|
|
if main_type == 'blacklist':
|
|
config['impact'] = 5
|
|
elif main_type == 'botnet':
|
|
config['impact'] = 2
|
|
elif main_type == 'cc':
|
|
config['impact'] = 5
|
|
elif main_type == 'cisco':
|
|
config['impact'] = 3
|
|
elif main_type == 'cwsandbox':
|
|
config['impact'] = 5
|
|
elif main_type == 'drone':
|
|
config['impact'] = 2
|
|
elif main_type == 'microsoft':
|
|
config['impact'] = 3
|
|
elif main_type == 'scan':
|
|
config['impact'] = 1
|
|
elif main_type == 'sinkhole6':
|
|
config['impact'] = 2
|
|
elif main_type == 'sinkhole':
|
|
config['impact'] = 2
|
|
else:
|
|
config['impact'] = 1
|
|
|
|
if not (self.config_path_modules / f"{config['vendor']}_{config['name']}.json").exists():
|
|
self.logger.debug(f'Creating config file for {filename}.')
|
|
with open(self.config_path_modules / f"{config['vendor']}_{config['name']}.json", 'w') as f:
|
|
json.dump(config, f, indent=2)
|
|
else:
|
|
with open(self.config_path_modules / f"{config['vendor']}_{config['name']}.json", 'r') as f:
|
|
# Validate new config file with old
|
|
config_current = json.load(f)
|
|
if config_current != config:
|
|
self.logger.warning('The config file created by this script is different from the one on disk: \n{}\n{}'.format(json.dumps(config), json.dumps(config_current)))
|
|
# Init list directory
|
|
directory = self.storage_directory / config['vendor'] / config['name']
|
|
safe_create_dir(directory)
|
|
meta = directory / 'meta'
|
|
safe_create_dir(meta)
|
|
archive_dir = directory / 'archive'
|
|
safe_create_dir(archive_dir)
|
|
self.logger.debug(f'Done with config for {filename}.')
|
|
return directory
|
|
|
|
async def download_daily_entries(self, day: Optional[Dates]=None):
|
|
await self.__build_daily_dict()
|
|
for url, filename in self.available_entries[self.__normalize_day(day)]:
|
|
storage_dir = self.__check_config(filename)
|
|
if not storage_dir:
|
|
continue
|
|
# Check if the file we're trying to download has already been downloaded. Skip if True.
|
|
uuid = url.split('/')[-1]
|
|
if (storage_dir / 'meta' / 'last_download').exists():
|
|
with open(storage_dir / 'meta' / 'last_download') as _fr:
|
|
last_download_uuid = _fr.read()
|
|
if last_download_uuid == uuid:
|
|
self.logger.debug(f'Already downloaded: {url}.')
|
|
continue
|
|
async with aiohttp.ClientSession() as s:
|
|
async with s.get(url) as r:
|
|
self.logger.info(f'Downloading {url}.')
|
|
content = await r.content.read()
|
|
with (storage_dir / f'{datetime.now().isoformat()}.txt').open('wb') as _fw:
|
|
_fw.write(content)
|
|
with (storage_dir / 'meta' / 'last_download').open('w') as _fwt:
|
|
_fwt.write(uuid)
|
|
|
|
|
|
class ShadowServerManager(AbstractManager):
|
|
|
|
def __init__(self, loglevel: int=logging.INFO):
|
|
super().__init__(loglevel)
|
|
self.script_name = 'shadowserver_fetcher'
|
|
shadow_server_config_file = get_homedir() / 'config' / 'shadowserver.json'
|
|
self.config = True
|
|
if not shadow_server_config_file.exists():
|
|
self.config = False
|
|
self.logger.warning(f'No config file available {shadow_server_config_file}, the shadow server module will not be launched.')
|
|
return
|
|
with shadow_server_config_file.open() as f:
|
|
ss_config = json.load(f)
|
|
self.fetcher = ShadowServerFetcher(ss_config['user'], ss_config['password'], self.logger)
|
|
|
|
async def _to_run_forever_async(self):
|
|
await self.fetcher.download_daily_entries()
|
|
|
|
|
|
def main():
|
|
modules_manager = ShadowServerManager()
|
|
if modules_manager.config:
|
|
asyncio.run(modules_manager.run_async(sleep_in_sec=3600))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|