diff --git a/.gitignore b/.gitignore index 5b35b1e..1750393 100644 --- a/.gitignore +++ b/.gitignore @@ -110,3 +110,11 @@ rawdata storage/ardb.pid storage/data storage/repl + +# Config file shadow server with password +bgpranking/config/shadowserver.json +# Ths shadow server config files are dynamically generated +bgpranking/config/modules/shadowserver_*.json + +# Do not store the d3 lib in the repo +website/web/static/d3*.js diff --git a/bgpranking/abstractmanager.py b/bgpranking/abstractmanager.py index 3b9f91b..23a5376 100644 --- a/bgpranking/abstractmanager.py +++ b/bgpranking/abstractmanager.py @@ -10,6 +10,7 @@ from .libs.helpers import long_sleep, shutdown_requested class AbstractManager(ABC): def __init__(self, loglevel: int=logging.DEBUG): + self.loglevel = loglevel self.logger = logging.getLogger(f'{self.__class__.__name__}') self.logger.setLevel(loglevel) self.logger.info(f'Initializing {self.__class__.__name__}') diff --git a/bgpranking/config/modules/shadowserver_only.sh b/bgpranking/config/modules/shadowserver_only.sh new file mode 100755 index 0000000..f18d695 --- /dev/null +++ b/bgpranking/config/modules/shadowserver_only.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +set -e +set -x + +find . -maxdepth 1 -type f -name "*.json" ! -iname "shadowserver*.json" -delete diff --git a/bgpranking/config/shadowserver.json.example b/bgpranking/config/shadowserver.json.example new file mode 100644 index 0000000..3d16693 --- /dev/null +++ b/bgpranking/config/shadowserver.json.example @@ -0,0 +1,4 @@ +{ + "user": "[USERNAME]", + "password": "[PASSWORD]" +} diff --git a/bgpranking/modulesfetcher.py b/bgpranking/modulesfetcher.py index b159500..4f8a8d7 100644 --- a/bgpranking/modulesfetcher.py +++ b/bgpranking/modulesfetcher.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- import aiohttp @@ -112,10 +112,10 @@ class Fetcher(): self.logger.debug('New list, no hisorical files') # nothing has been downloaded ever, moving on return False + dl_hash = sha512(downloaded) for last_file in to_check: with last_file.open('rb') as f: last_hash = sha512(f.read()) - dl_hash = sha512(downloaded) if (dl_hash.digest() == last_hash.digest() and parser.parse(last_file.name.split('.')[0]).date() == date.today()): self.logger.debug('Same file already downloaded today.') diff --git a/bgpranking/parser.py b/bgpranking/parser.py index 9e9152c..cb71485 100644 --- a/bgpranking/parser.py +++ b/bgpranking/parser.py @@ -11,7 +11,7 @@ from uuid import uuid4 from io import BytesIO import importlib -from typing import List +from typing import List, Union, Tuple import types from .libs.helpers import safe_create_dir, set_running, unset_running, get_socket_path @@ -20,7 +20,7 @@ from .libs.helpers import safe_create_dir, set_running, unset_running, get_socke class RawFilesParser(): def __init__(self, config_file: Path, storage_directory: Path, - loglevel: int=logging.DEBUG): + loglevel: int=logging.DEBUG) -> None: with open(config_file, 'r') as f: module_parameters = json.load(f) self.vendor = module_parameters['vendor'] @@ -36,7 +36,7 @@ class RawFilesParser(): self.redis_intake = StrictRedis(unix_socket_path=get_socket_path('intake'), db=0) self.logger.debug(f'Starting intake on {self.source}') - def __init_logger(self, loglevel): + def __init_logger(self, loglevel) -> None: self.logger = logging.getLogger(f'{self.__class__.__name__}-{self.vendor}-{self.listname}') self.logger.setLevel(loglevel) @@ -52,12 +52,12 @@ class RawFilesParser(): Only run it when needed, it is nasty and slow''' return ['.'.join(str(int(part)) for part in ip.split(b'.')).encode() for ip in ips] - def parse_raw_file(self, f: BytesIO): + def parse_raw_file(self, f: BytesIO) -> List[bytes]: # If the list doesn't provide a time, fallback to current day, midnight self.datetime = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) return self.extract_ipv4(f.getvalue()) - def parse_raw_files(self): + def parse_raw_files(self) -> None: set_running(f'{self.__class__.__name__}-{self.source}') nb_unparsable_files = len([f for f in self.unparsable_dir.iterdir() if f.is_file()]) if nb_unparsable_files: @@ -69,9 +69,13 @@ class RawFilesParser(): to_parse = BytesIO(f.read()) p = self.redis_intake.pipeline() for ip in self.parse_raw_file(to_parse): + if isinstance(ip, tuple): + ip, datetime = ip + else: + datetime = self.datetime uuid = uuid4() p.hmset(uuid, {'ip': ip, 'source': self.source, - 'datetime': self.datetime.isoformat()}) + 'datetime': datetime.isoformat()}) p.sadd('intake', uuid) p.execute() self._archive(filepath) @@ -81,10 +85,10 @@ class RawFilesParser(): finally: unset_running(f'{self.__class__.__name__}-{self.source}') - def _archive(self, filepath: Path): + def _archive(self, filepath: Path) -> None: '''After processing, move file to the archive directory''' filepath.rename(self.directory / 'archive' / filepath.name) - def _unparsable(self, filepath: Path): + def _unparsable(self, filepath: Path) -> None: '''After processing, move file to the archive directory''' filepath.rename(self.unparsable_dir / filepath.name) diff --git a/bgpranking/parsers/abusech.py b/bgpranking/parsers/abusech.py index c4bede5..8cb256f 100644 --- a/bgpranking/parsers/abusech.py +++ b/bgpranking/parsers/abusech.py @@ -1,11 +1,13 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- from dateutil.parser import parse import re from io import BytesIO +from typing import List -def parse_raw_file(self, f: BytesIO): + +def parse_raw_file(self, f: BytesIO) -> List[bytes]: self.datetime = parse(re.findall(b'# Generated on (.*)#\n', f.getvalue())[0]) return self.extract_ipv4(f.getvalue()) diff --git a/bgpranking/parsers/bambenekconsulting.py b/bgpranking/parsers/bambenekconsulting.py index 5815d69..01e51b3 100644 --- a/bgpranking/parsers/bambenekconsulting.py +++ b/bgpranking/parsers/bambenekconsulting.py @@ -1,11 +1,13 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- from dateutil.parser import parse import re from io import BytesIO +from typing import List -def parse_raw_file(self, f: BytesIO): + +def parse_raw_file(self, f: BytesIO) -> List[bytes]: self.datetime = parse(re.findall(b'## Feed generated at: (.*)\n', f.getvalue())[0]) return self.extract_ipv4(f.getvalue()) diff --git a/bgpranking/parsers/default.py b/bgpranking/parsers/default.py deleted file mode 100644 index 11b22f7..0000000 --- a/bgpranking/parsers/default.py +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -from io import BytesIO -from datetime import datetime - -from ..simple_feed_fetcher import RawFileImporter - - -class DefaultImporter(RawFileImporter): - - def parse_raw_file(self, f: BytesIO): - self.datetime = datetime.now() - return self.extract_ipv4(f.getvalue()) diff --git a/bgpranking/parsers/dshield.py b/bgpranking/parsers/dshield.py index d4ec687..6fe4d1e 100644 --- a/bgpranking/parsers/dshield.py +++ b/bgpranking/parsers/dshield.py @@ -4,9 +4,10 @@ from dateutil.parser import parse import re from io import BytesIO +from typing import List -def parse_raw_file(self, f: BytesIO): +def parse_raw_file(self, f: BytesIO) -> List[bytes]: self.datetime = parse(re.findall(b'# updated (.*)\n', f.getvalue())[0]) iplist = self.extract_ipv4(f.getvalue()) # The IPS have leading 0s. Getting tid of them directly here. diff --git a/bgpranking/parsers/malc0de.py b/bgpranking/parsers/malc0de.py index db2d217..340b82b 100644 --- a/bgpranking/parsers/malc0de.py +++ b/bgpranking/parsers/malc0de.py @@ -1,11 +1,13 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- from dateutil.parser import parse import re from io import BytesIO +from typing import List -def parse_raw_file(self, f: BytesIO): + +def parse_raw_file(self, f: BytesIO) -> List[bytes]: self.datetime = parse(re.findall(b'// Last updated (.*)\n', f.getvalue())[0]) return self.extract_ipv4(f.getvalue()) diff --git a/bgpranking/parsers/nothink.py b/bgpranking/parsers/nothink.py index 3f3ef01..c4d14c3 100644 --- a/bgpranking/parsers/nothink.py +++ b/bgpranking/parsers/nothink.py @@ -1,11 +1,12 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- from dateutil.parser import parse import re from io import BytesIO +from typing import List -def parse_raw_file(self, f: BytesIO): +def parse_raw_file(self, f: BytesIO) -> List[bytes]: self.datetime = parse(re.findall(b'# Generated (.*)\n', f.getvalue())[0]) return self.extract_ipv4(f.getvalue()) diff --git a/bgpranking/parsers/shadowserver.py b/bgpranking/parsers/shadowserver.py new file mode 100644 index 0000000..51cda95 --- /dev/null +++ b/bgpranking/parsers/shadowserver.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +from dateutil.parser import parse +from csv import DictReader +from io import BytesIO, StringIO +from typing import Tuple, Generator +from datetime import datetime + + +def parse_raw_file(self, f: BytesIO) -> Generator[Tuple[str, datetime], None, None]: + default_ts = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) + reader = DictReader(StringIO(f.getvalue().decode())) + for row in reader: + if 'timestamp' in row: + ts = parse(row['timestamp']) + else: + ts = default_ts + + if 'ip' in row: + ip = row['ip'] + elif 'src_ip' in row: + # For sinkhole6_http + ip = row['src_ip'] + else: + self.logger.critical(f'No IPs in the list {self.source}.') + break + yield ip, ts diff --git a/bgpranking/ranking.py b/bgpranking/ranking.py index 075ce7f..eed679e 100644 --- a/bgpranking/ranking.py +++ b/bgpranking/ranking.py @@ -4,7 +4,7 @@ import logging from redis import StrictRedis from .libs.helpers import set_running, unset_running, get_socket_path, load_config_files -from datetime import date +from datetime import datetime, date, timedelta from ipaddress import ip_network from pathlib import Path @@ -22,40 +22,34 @@ class Ranking(): self.logger = logging.getLogger(f'{self.__class__.__name__}') self.logger.setLevel(loglevel) - def compute(self): - self.logger.info('Start ranking') - set_running(self.__class__.__name__) - today = date.today().isoformat() + def rank_a_day(self, day: str): + # FIXME: If we want to rank an older date, we need to hav older datasets for the announces v4_last, v6_last = self.asn_meta.mget('v4|last', 'v6|last') - if not v4_last or not v6_last: - '''Failsafe if asn_meta has not been populated yet''' - unset_running(self.__class__.__name__) - return - asns_aggregation_key_v4 = f'{today}|asns|v4' - asns_aggregation_key_v6 = f'{today}|asns|v6' + asns_aggregation_key_v4 = f'{day}|asns|v4' + asns_aggregation_key_v6 = f'{day}|asns|v6' to_delete = set([asns_aggregation_key_v4, asns_aggregation_key_v6]) r_pipeline = self.ranking.pipeline() - for source in self.storage.smembers(f'{today}|sources'): - self.logger.info(f'{today} - Ranking source: {source}') - source_aggregation_key_v4 = f'{today}|{source}|asns|v4' - source_aggregation_key_v6 = f'{today}|{source}|asns|v6' + for source in self.storage.smembers(f'{day}|sources'): + self.logger.info(f'{day} - Ranking source: {source}') + source_aggregation_key_v4 = f'{day}|{source}|asns|v4' + source_aggregation_key_v6 = f'{day}|{source}|asns|v6' to_delete.update([source_aggregation_key_v4, source_aggregation_key_v6]) - for asn in self.storage.smembers(f'{today}|{source}'): - prefixes_aggregation_key_v4 = f'{today}|{asn}|v4' - prefixes_aggregation_key_v6 = f'{today}|{asn}|v6' + for asn in self.storage.smembers(f'{day}|{source}'): + prefixes_aggregation_key_v4 = f'{day}|{asn}|v4' + prefixes_aggregation_key_v6 = f'{day}|{asn}|v6' to_delete.update([prefixes_aggregation_key_v4, prefixes_aggregation_key_v6]) if asn == '0': # Default ASN when no matches. Probably spoofed. continue - self.logger.debug(f'{today} - Ranking source: {source} / ASN: {asn}') + self.logger.debug(f'{day} - Ranking source: {source} / ASN: {asn}') asn_rank_v4 = 0.0 asn_rank_v6 = 0.0 - for prefix in self.storage.smembers(f'{today}|{source}|{asn}'): + for prefix in self.storage.smembers(f'{day}|{source}|{asn}'): ips = set([ip_ts.split('|')[0] - for ip_ts in self.storage.smembers(f'{today}|{source}|{asn}|{prefix}')]) + for ip_ts in self.storage.smembers(f'{day}|{source}|{asn}|{prefix}')]) py_prefix = ip_network(prefix) prefix_rank = float(len(ips)) / py_prefix.num_addresses - r_pipeline.zadd(f'{today}|{source}|{asn}|v{py_prefix.version}|prefixes', prefix_rank, prefix) + r_pipeline.zadd(f'{day}|{source}|{asn}|v{py_prefix.version}|prefixes', prefix_rank, prefix) if py_prefix.version == 4: asn_rank_v4 += len(ips) * self.config_files[source]['impact'] r_pipeline.zincrby(prefixes_aggregation_key_v4, prefix, prefix_rank * self.config_files[source]['impact']) @@ -66,17 +60,32 @@ class Ranking(): if v4count: asn_rank_v4 /= float(v4count) if asn_rank_v4: - r_pipeline.set(f'{today}|{source}|{asn}|v4', asn_rank_v4) + r_pipeline.set(f'{day}|{source}|{asn}|v4', asn_rank_v4) r_pipeline.zincrby(asns_aggregation_key_v4, asn, asn_rank_v4) r_pipeline.zadd(source_aggregation_key_v4, asn_rank_v4, asn) if v6count: asn_rank_v6 /= float(v6count) if asn_rank_v6: - r_pipeline.set(f'{today}|{source}|{asn}|v6', asn_rank_v6) + r_pipeline.set(f'{day}|{source}|{asn}|v6', asn_rank_v6) r_pipeline.zincrby(asns_aggregation_key_v6, asn, asn_rank_v6) r_pipeline.zadd(source_aggregation_key_v6, asn_rank_v4, asn) self.ranking.delete(*to_delete) r_pipeline.execute() + def compute(self): + self.logger.info('Start ranking') + set_running(self.__class__.__name__) + v4_last, v6_last = self.asn_meta.mget('v4|last', 'v6|last') + if not v4_last or not v6_last: + '''Failsafe if asn_meta has not been populated yet''' + unset_running(self.__class__.__name__) + return + today = date.today() + now = datetime.now() + today12am = now.replace(hour=12, minute=0, second=0, microsecond=0) + if now < today12am: + # Compute yesterday and today's ranking (useful when we have lists generated only once a day) + self.rank_a_day((today - timedelta(days=1)).isoformat()) + self.rank_a_day(today.isoformat()) unset_running(self.__class__.__name__) self.logger.info('Ranking done.') diff --git a/bgpranking/shadowserverfetcher.py b/bgpranking/shadowserverfetcher.py new file mode 100644 index 0000000..d9e43a3 --- /dev/null +++ b/bgpranking/shadowserverfetcher.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import aiohttp +import logging +from bs4 import BeautifulSoup +from dateutil.parser import parse +from typing import Tuple +from datetime import datetime, date +from pathlib import Path +from .libs.helpers import safe_create_dir, set_running, unset_running +import json + + +class ShadowServerFetcher(): + + def __init__(self, user, password, config_path_modules: Path, storage_directory: Path, + loglevel: int=logging.DEBUG) -> None: + self.__init_logger(loglevel) + self.storage_directory = storage_directory + self.config_path_modules = config_path_modules + self.user = user + self.password = password + self.index_page = 'https://dl.shadowserver.org/reports/index.php' + self.vendor = 'shadowserver' + self.known_list_types = ('blacklist', 'botnet', 'cc', 'cisco', 'cwsandbox', 'drone', + 'microsoft', 'scan', 'sinkhole6', 'sinkhole') + self.first_available_day = None + self.last_available_day = None + self.available_entries = {} + + def __init_logger(self, loglevel): + self.logger = logging.getLogger(f'{self.__class__.__name__}') + self.logger.setLevel(loglevel) + + async def __get_index(self): + auth_details = {'user': self.user, 'password': self.password, 'login': 'Login'} + async with aiohttp.ClientSession() as s: + self.logger.debug('Fetching the index.') + async with s.post(self.index_page, data=auth_details) as r: + return await r.text() + + async def __build_daily_dict(self): + html_index = await self.__get_index() + soup = BeautifulSoup(html_index, 'html.parser') + treeview = soup.find(id='treemenu1') + for y in treeview.select('> li'): + year = y.contents[0] + for m in y.contents[1].select('> li'): + month = m.contents[0] + for d in m.contents[1].select('> li'): + day = d.contents[0] + date = parse(f'{year} {month} {day}').date() + self.available_entries[date.isoformat()] = [] + for a in d.contents[1].find_all('a', href=True): + if not self.first_available_day: + self.first_available_day = date + self.last_available_day = date + self.available_entries[date.isoformat()].append((a['href'], a.string)) + self.logger.debug('Dictionary created.') + + def __normalize_day(self, day: Tuple[str, date, datetime]=None) -> str: + if not day: + if not self.last_available_day: + raise Exception('Unable to figure out the last available day. You need to run build_daily_dict first') + day = self.last_available_day + else: + if isinstance(day, str): + day = parse(day).date() + elif isinstance(day, datetime): + day = day.date() + return day.isoformat() + + def __split_name(self, name): + type_content, country, list_type = name.split('-') + if '_' in type_content: + type_content, details_type = type_content.split('_', maxsplit=1) + if '_' in details_type: + details_type, sub = details_type.split('_') + return list_type, country, (type_content, details_type, sub) + return list_type, country, (type_content, details_type) + return list_type, country, (type_content) + + def __check_config(self, filename: str) -> Path: + self.logger.debug(f'Working on config for {filename}.') + config = {'vendor': 'shadowserver', 'parser': '.parsers.shadowserver'} + type_content, _, type_details = self.__split_name(filename) + prefix = type_content.split('.')[0] + config['name'] = '{}-{}'.format(prefix, '_'.join(type_details)) + + main_type = type_details[0] + if main_type not in self.known_list_types: + self.logger.warning(f'Unknown type: {main_type}. Please update the config creator script.') + return None + + if main_type == 'blacklist': + config['impact'] = 5 + elif main_type == 'botnet': + config['impact'] = 2 + elif main_type == 'cc': + config['impact'] = 5 + elif main_type == 'cisco': + config['impact'] = 3 + elif main_type == 'cwsandbox': + config['impact'] = 5 + elif main_type == 'drone': + config['impact'] = 2 + elif main_type == 'microsoft': + config['impact'] = 3 + elif main_type == 'scan': + config['impact'] = 1 + elif main_type == 'sinkhole6': + config['impact'] = 2 + elif main_type == 'sinkhole': + config['impact'] = 2 + if not (self.config_path_modules / f"{config['vendor']}_{config['name']}.json").exists(): + self.logger.debug(f'Creating config file for {filename}.') + with open(self.config_path_modules / f"{config['vendor']}_{config['name']}.json", 'w') as f: + json.dump(config, f, indent=2) + else: + with open(self.config_path_modules / f"{config['vendor']}_{config['name']}.json", 'r') as f: + # Validate new config file with old + config_current = json.load(f) + if config_current != config: + self.logger.warning('The config file created by this script is different from the one on disk: \n{}\n{}'.format(json.dumps(config), json.dumps(config_current))) + # Init list directory + directory = self.storage_directory / config['vendor'] / config['name'] + safe_create_dir(directory) + meta = directory / 'meta' + safe_create_dir(meta) + archive_dir = directory / 'archive' + safe_create_dir(archive_dir) + self.logger.debug(f'Done with config for {filename}.') + return directory + + async def download_daily_entries(self, day: Tuple[str, date, datetime]=None): + set_running(f'{self.__class__.__name__}') + await self.__build_daily_dict() + for url, filename in self.available_entries[self.__normalize_day(day)]: + storage_dir = self.__check_config(filename) + if not storage_dir: + continue + # Check if the file we're trying to download has already been downloaded. Skip if True. + uuid = url.split('/')[-1] + if (storage_dir / 'meta' / 'last_download').exists(): + with open(storage_dir / 'meta' / 'last_download') as f: + last_download_uuid = f.read() + if last_download_uuid == uuid: + self.logger.debug(f'Already downloaded: {url}.') + continue + async with aiohttp.ClientSession() as s: + async with s.get(url) as r: + self.logger.info(f'Downloading {url}.') + content = await r.content.read() + with (storage_dir / '{}.txt'.format(datetime.now().isoformat())).open('wb') as f: + f.write(content) + with open(storage_dir / 'meta' / 'last_download', 'w') as f: + f.write(uuid) + unset_running(f'{self.__class__.__name__}') diff --git a/bin/archiver.py b/bin/archiver.py index 1917a43..87ca19a 100755 --- a/bin/archiver.py +++ b/bin/archiver.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- from bgpranking.archive import DeepArchive diff --git a/bin/asn_descriptions.py b/bin/asn_descriptions.py index 19436e1..feedecf 100755 --- a/bin/asn_descriptions.py +++ b/bin/asn_descriptions.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- import logging diff --git a/bin/dbinsert.py b/bin/dbinsert.py index 40f2e68..656671e 100755 --- a/bin/dbinsert.py +++ b/bin/dbinsert.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- import logging diff --git a/bin/fetcher.py b/bin/fetcher.py index c87a1ef..d97e385 100755 --- a/bin/fetcher.py +++ b/bin/fetcher.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- import logging @@ -21,20 +21,28 @@ class ModulesManager(AbstractManager): if not config_dir: config_dir = get_config_path() if not storage_directory: - storage_directory = get_homedir() / 'rawdata' - modules_config = config_dir / 'modules' - modules_paths = [modulepath for modulepath in modules_config.glob('*.json')] - self.modules = [Fetcher(path, storage_directory, loglevel) for path in modules_paths] + self.storage_directory = get_homedir() / 'rawdata' + self.modules_config = config_dir / 'modules' + self.modules_paths = [modulepath for modulepath in self.modules_config.glob('*.json')] + self.modules = [Fetcher(path, self.storage_directory, loglevel) for path in self.modules_paths] def _to_run_forever(self): - loop = asyncio.get_event_loop() - try: - loop.run_until_complete(asyncio.gather( - *[module.fetch_list() for module in self.modules if module.fetcher], - return_exceptions=True) - ) - except aiohttp.client_exceptions.ClientConnectorError as e: - self.logger.critical(f'Exception while fetching lists: {e}') + # Check if there are new config files + new_modules_paths = [modulepath for modulepath in self.modules_config.glob('*.json') if modulepath not in self.modules_paths] + self.modules += [Fetcher(path, self.storage_directory, self.loglevel) for path in new_modules_paths] + self.modules_paths += new_modules_paths + + if self.modules: + loop = asyncio.get_event_loop() + try: + loop.run_until_complete(asyncio.gather( + *[module.fetch_list() for module in self.modules if module.fetcher], + return_exceptions=True) + ) + except aiohttp.client_exceptions.ClientConnectorError as e: + self.logger.critical(f'Exception while fetching lists: {e}') + else: + self.logger.info('No config files were found so there are no fetchers running yet. Will try again later.') if __name__ == '__main__': diff --git a/bin/loadprefixes.py b/bin/loadprefixes.py index 5208b2a..c9c3334 100755 --- a/bin/loadprefixes.py +++ b/bin/loadprefixes.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- import logging diff --git a/bin/monitor.py b/bin/monitor.py index 67b31ac..72f4ce5 100755 --- a/bin/monitor.py +++ b/bin/monitor.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- from bgpranking.monitor import Monitor diff --git a/bin/parser.py b/bin/parser.py index b9e839c..981cacf 100755 --- a/bin/parser.py +++ b/bin/parser.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- import logging @@ -19,13 +19,21 @@ class ParserManager(AbstractManager): if not config_dir: config_dir = get_config_path() if not storage_directory: - storage_directory = get_homedir() / 'rawdata' - modules_config = config_dir / 'modules' - modules_paths = [modulepath for modulepath in modules_config.glob('*.json')] - self.modules = [RawFilesParser(path, storage_directory, loglevel) for path in modules_paths] + self.storage_directory = get_homedir() / 'rawdata' + self.modules_config = config_dir / 'modules' + self.modules_paths = [modulepath for modulepath in self.modules_config.glob('*.json')] + self.modules = [RawFilesParser(path, self.storage_directory, loglevel) for path in self.modules_paths] def _to_run_forever(self): - [module.parse_raw_files() for module in self.modules] + # Check if there are new config files + new_modules_paths = [modulepath for modulepath in self.modules_config.glob('*.json') if modulepath not in self.modules_paths] + self.modules += [RawFilesParser(path, self.storage_directory, self.loglevel) for path in new_modules_paths] + self.modules_paths += new_modules_paths + + if self.modules: + [module.parse_raw_files() for module in self.modules] + else: + self.logger.warning('No config files were found so there are no parsers running yet. Will try again later.') if __name__ == '__main__': diff --git a/bin/ranking.py b/bin/ranking.py index 00330fa..27aa539 100755 --- a/bin/ranking.py +++ b/bin/ranking.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- import logging diff --git a/bin/rislookup.py b/bin/rislookup.py index 39f4e91..c7aa968 100755 --- a/bin/rislookup.py +++ b/bin/rislookup.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- import logging diff --git a/bin/run_backend.py b/bin/run_backend.py index 75ed348..de08f9f 100755 --- a/bin/run_backend.py +++ b/bin/run_backend.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- from bgpranking.libs.helpers import get_homedir, check_running diff --git a/bin/sanitizer.py b/bin/sanitizer.py index 21a6891..ba0e46e 100755 --- a/bin/sanitizer.py +++ b/bin/sanitizer.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- import logging diff --git a/bin/shutdown.py b/bin/shutdown.py index 5454851..c64919f 100755 --- a/bin/shutdown.py +++ b/bin/shutdown.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- from bgpranking.libs.helpers import is_running, get_socket_path diff --git a/bin/ssfetcher.py b/bin/ssfetcher.py new file mode 100755 index 0000000..c5a657d --- /dev/null +++ b/bin/ssfetcher.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import logging +import json +import asyncio +from pathlib import Path +import aiohttp + +from bgpranking.abstractmanager import AbstractManager +from bgpranking.shadowserverfetcher import ShadowServerFetcher +from bgpranking.libs.helpers import get_config_path, get_homedir + +logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s', + level=logging.INFO, datefmt='%I:%M:%S') + + +class ShadowServerManager(AbstractManager): + + def __init__(self, config_dir: Path=None, storage_directory: Path=None, loglevel: int=logging.DEBUG): + super().__init__(loglevel) + self.config = True + if not config_dir: + config_dir = get_config_path() + if not (config_dir / 'shadowserver.json').exists(): + self.config = False + self.logger.warning(f'No config file available, the shadow server module will not be launched.') + return + with open(config_dir / 'shadowserver.json') as f: + ss_config = json.load(f) + if not storage_directory: + storage_directory = get_homedir() / 'rawdata' + modules_config = config_dir / 'modules' + self.fetcher = ShadowServerFetcher(ss_config['user'], ss_config['password'], modules_config, storage_directory, loglevel) + + def _to_run_forever(self): + loop = asyncio.get_event_loop() + try: + loop.run_until_complete(self.fetcher.download_daily_entries()) + except aiohttp.client_exceptions.ClientConnectorError as e: + self.logger.critical(f'Exception while fetching Shadow Server lists: {e}') + + +if __name__ == '__main__': + modules_manager = ShadowServerManager() + if modules_manager.config: + modules_manager.run(sleep_in_sec=3600) diff --git a/bin/start.py b/bin/start.py index b9c99ee..b546102 100755 --- a/bin/start.py +++ b/bin/start.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- from subprocess import Popen @@ -13,6 +13,7 @@ if __name__ == '__main__': Popen(['loadprefixes.py']) Popen(['rislookup.py']) Popen(['fetcher.py']) + Popen(['ssfetcher.py']) Popen(['parser.py']) Popen(['sanitizer.py']) Popen(['dbinsert.py']) diff --git a/bin/stop.py b/bin/stop.py index c0c5a9c..58d01e9 100755 --- a/bin/stop.py +++ b/bin/stop.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- from subprocess import Popen diff --git a/setup.py b/setup.py index 2b3db44..56748bc 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ setup( description='BGP Ranking, the new one..', packages=['bgpranking'], scripts=['bin/archiver.py', 'bin/dbinsert.py', 'bin/fetcher.py', 'bin/parser.py', - 'bin/loadprefixes.py', 'bin/rislookup.py', 'bin/sanitizer.py', 'bin/run_backend.py', + 'bin/loadprefixes.py', 'bin/rislookup.py', 'bin/sanitizer.py', 'bin/run_backend.py', 'bin/ssfetcher.py', 'bin/monitor.py', 'bin/ranking.py', 'bin/asn_descriptions.py', 'bin/start.py', 'bin/stop.py', 'bin/shutdown.py'], classifiers=[ 'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)',