new: Add Shadowserver module

pull/12/head
Raphaël Vinot 2018-07-13 14:51:00 +02:00
parent faa3c634f2
commit 6045635b72
31 changed files with 366 additions and 89 deletions

8
.gitignore vendored
View File

@ -110,3 +110,11 @@ rawdata
storage/ardb.pid storage/ardb.pid
storage/data storage/data
storage/repl storage/repl
# Config file shadow server with password
bgpranking/config/shadowserver.json
# Ths shadow server config files are dynamically generated
bgpranking/config/modules/shadowserver_*.json
# Do not store the d3 lib in the repo
website/web/static/d3*.js

View File

@ -10,6 +10,7 @@ from .libs.helpers import long_sleep, shutdown_requested
class AbstractManager(ABC): class AbstractManager(ABC):
def __init__(self, loglevel: int=logging.DEBUG): def __init__(self, loglevel: int=logging.DEBUG):
self.loglevel = loglevel
self.logger = logging.getLogger(f'{self.__class__.__name__}') self.logger = logging.getLogger(f'{self.__class__.__name__}')
self.logger.setLevel(loglevel) self.logger.setLevel(loglevel)
self.logger.info(f'Initializing {self.__class__.__name__}') self.logger.info(f'Initializing {self.__class__.__name__}')

View File

@ -0,0 +1,6 @@
#!/bin/bash
set -e
set -x
find . -maxdepth 1 -type f -name "*.json" ! -iname "shadowserver*.json" -delete

View File

@ -0,0 +1,4 @@
{
"user": "[USERNAME]",
"password": "[PASSWORD]"
}

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import aiohttp import aiohttp
@ -112,10 +112,10 @@ class Fetcher():
self.logger.debug('New list, no hisorical files') self.logger.debug('New list, no hisorical files')
# nothing has been downloaded ever, moving on # nothing has been downloaded ever, moving on
return False return False
dl_hash = sha512(downloaded)
for last_file in to_check: for last_file in to_check:
with last_file.open('rb') as f: with last_file.open('rb') as f:
last_hash = sha512(f.read()) last_hash = sha512(f.read())
dl_hash = sha512(downloaded)
if (dl_hash.digest() == last_hash.digest() and if (dl_hash.digest() == last_hash.digest() and
parser.parse(last_file.name.split('.')[0]).date() == date.today()): parser.parse(last_file.name.split('.')[0]).date() == date.today()):
self.logger.debug('Same file already downloaded today.') self.logger.debug('Same file already downloaded today.')

View File

@ -11,7 +11,7 @@ from uuid import uuid4
from io import BytesIO from io import BytesIO
import importlib import importlib
from typing import List from typing import List, Union, Tuple
import types import types
from .libs.helpers import safe_create_dir, set_running, unset_running, get_socket_path from .libs.helpers import safe_create_dir, set_running, unset_running, get_socket_path
@ -20,7 +20,7 @@ from .libs.helpers import safe_create_dir, set_running, unset_running, get_socke
class RawFilesParser(): class RawFilesParser():
def __init__(self, config_file: Path, storage_directory: Path, def __init__(self, config_file: Path, storage_directory: Path,
loglevel: int=logging.DEBUG): loglevel: int=logging.DEBUG) -> None:
with open(config_file, 'r') as f: with open(config_file, 'r') as f:
module_parameters = json.load(f) module_parameters = json.load(f)
self.vendor = module_parameters['vendor'] self.vendor = module_parameters['vendor']
@ -36,7 +36,7 @@ class RawFilesParser():
self.redis_intake = StrictRedis(unix_socket_path=get_socket_path('intake'), db=0) self.redis_intake = StrictRedis(unix_socket_path=get_socket_path('intake'), db=0)
self.logger.debug(f'Starting intake on {self.source}') self.logger.debug(f'Starting intake on {self.source}')
def __init_logger(self, loglevel): def __init_logger(self, loglevel) -> None:
self.logger = logging.getLogger(f'{self.__class__.__name__}-{self.vendor}-{self.listname}') self.logger = logging.getLogger(f'{self.__class__.__name__}-{self.vendor}-{self.listname}')
self.logger.setLevel(loglevel) self.logger.setLevel(loglevel)
@ -52,12 +52,12 @@ class RawFilesParser():
Only run it when needed, it is nasty and slow''' Only run it when needed, it is nasty and slow'''
return ['.'.join(str(int(part)) for part in ip.split(b'.')).encode() for ip in ips] return ['.'.join(str(int(part)) for part in ip.split(b'.')).encode() for ip in ips]
def parse_raw_file(self, f: BytesIO): def parse_raw_file(self, f: BytesIO) -> List[bytes]:
# If the list doesn't provide a time, fallback to current day, midnight # If the list doesn't provide a time, fallback to current day, midnight
self.datetime = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) self.datetime = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
return self.extract_ipv4(f.getvalue()) return self.extract_ipv4(f.getvalue())
def parse_raw_files(self): def parse_raw_files(self) -> None:
set_running(f'{self.__class__.__name__}-{self.source}') set_running(f'{self.__class__.__name__}-{self.source}')
nb_unparsable_files = len([f for f in self.unparsable_dir.iterdir() if f.is_file()]) nb_unparsable_files = len([f for f in self.unparsable_dir.iterdir() if f.is_file()])
if nb_unparsable_files: if nb_unparsable_files:
@ -69,9 +69,13 @@ class RawFilesParser():
to_parse = BytesIO(f.read()) to_parse = BytesIO(f.read())
p = self.redis_intake.pipeline() p = self.redis_intake.pipeline()
for ip in self.parse_raw_file(to_parse): for ip in self.parse_raw_file(to_parse):
if isinstance(ip, tuple):
ip, datetime = ip
else:
datetime = self.datetime
uuid = uuid4() uuid = uuid4()
p.hmset(uuid, {'ip': ip, 'source': self.source, p.hmset(uuid, {'ip': ip, 'source': self.source,
'datetime': self.datetime.isoformat()}) 'datetime': datetime.isoformat()})
p.sadd('intake', uuid) p.sadd('intake', uuid)
p.execute() p.execute()
self._archive(filepath) self._archive(filepath)
@ -81,10 +85,10 @@ class RawFilesParser():
finally: finally:
unset_running(f'{self.__class__.__name__}-{self.source}') unset_running(f'{self.__class__.__name__}-{self.source}')
def _archive(self, filepath: Path): def _archive(self, filepath: Path) -> None:
'''After processing, move file to the archive directory''' '''After processing, move file to the archive directory'''
filepath.rename(self.directory / 'archive' / filepath.name) filepath.rename(self.directory / 'archive' / filepath.name)
def _unparsable(self, filepath: Path): def _unparsable(self, filepath: Path) -> None:
'''After processing, move file to the archive directory''' '''After processing, move file to the archive directory'''
filepath.rename(self.unparsable_dir / filepath.name) filepath.rename(self.unparsable_dir / filepath.name)

View File

@ -1,11 +1,13 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from dateutil.parser import parse from dateutil.parser import parse
import re import re
from io import BytesIO from io import BytesIO
from typing import List
def parse_raw_file(self, f: BytesIO):
def parse_raw_file(self, f: BytesIO) -> List[bytes]:
self.datetime = parse(re.findall(b'# Generated on (.*)#\n', f.getvalue())[0]) self.datetime = parse(re.findall(b'# Generated on (.*)#\n', f.getvalue())[0])
return self.extract_ipv4(f.getvalue()) return self.extract_ipv4(f.getvalue())

View File

@ -1,11 +1,13 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from dateutil.parser import parse from dateutil.parser import parse
import re import re
from io import BytesIO from io import BytesIO
from typing import List
def parse_raw_file(self, f: BytesIO):
def parse_raw_file(self, f: BytesIO) -> List[bytes]:
self.datetime = parse(re.findall(b'## Feed generated at: (.*)\n', f.getvalue())[0]) self.datetime = parse(re.findall(b'## Feed generated at: (.*)\n', f.getvalue())[0])
return self.extract_ipv4(f.getvalue()) return self.extract_ipv4(f.getvalue())

View File

@ -1,14 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from io import BytesIO
from datetime import datetime
from ..simple_feed_fetcher import RawFileImporter
class DefaultImporter(RawFileImporter):
def parse_raw_file(self, f: BytesIO):
self.datetime = datetime.now()
return self.extract_ipv4(f.getvalue())

View File

@ -4,9 +4,10 @@
from dateutil.parser import parse from dateutil.parser import parse
import re import re
from io import BytesIO from io import BytesIO
from typing import List
def parse_raw_file(self, f: BytesIO): def parse_raw_file(self, f: BytesIO) -> List[bytes]:
self.datetime = parse(re.findall(b'# updated (.*)\n', f.getvalue())[0]) self.datetime = parse(re.findall(b'# updated (.*)\n', f.getvalue())[0])
iplist = self.extract_ipv4(f.getvalue()) iplist = self.extract_ipv4(f.getvalue())
# The IPS have leading 0s. Getting tid of them directly here. # The IPS have leading 0s. Getting tid of them directly here.

View File

@ -1,11 +1,13 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from dateutil.parser import parse from dateutil.parser import parse
import re import re
from io import BytesIO from io import BytesIO
from typing import List
def parse_raw_file(self, f: BytesIO):
def parse_raw_file(self, f: BytesIO) -> List[bytes]:
self.datetime = parse(re.findall(b'// Last updated (.*)\n', f.getvalue())[0]) self.datetime = parse(re.findall(b'// Last updated (.*)\n', f.getvalue())[0])
return self.extract_ipv4(f.getvalue()) return self.extract_ipv4(f.getvalue())

View File

@ -1,11 +1,12 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from dateutil.parser import parse from dateutil.parser import parse
import re import re
from io import BytesIO from io import BytesIO
from typing import List
def parse_raw_file(self, f: BytesIO): def parse_raw_file(self, f: BytesIO) -> List[bytes]:
self.datetime = parse(re.findall(b'# Generated (.*)\n', f.getvalue())[0]) self.datetime = parse(re.findall(b'# Generated (.*)\n', f.getvalue())[0])
return self.extract_ipv4(f.getvalue()) return self.extract_ipv4(f.getvalue())

View File

@ -0,0 +1,28 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from dateutil.parser import parse
from csv import DictReader
from io import BytesIO, StringIO
from typing import Tuple, Generator
from datetime import datetime
def parse_raw_file(self, f: BytesIO) -> Generator[Tuple[str, datetime], None, None]:
default_ts = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
reader = DictReader(StringIO(f.getvalue().decode()))
for row in reader:
if 'timestamp' in row:
ts = parse(row['timestamp'])
else:
ts = default_ts
if 'ip' in row:
ip = row['ip']
elif 'src_ip' in row:
# For sinkhole6_http
ip = row['src_ip']
else:
self.logger.critical(f'No IPs in the list {self.source}.')
break
yield ip, ts

View File

@ -4,7 +4,7 @@
import logging import logging
from redis import StrictRedis from redis import StrictRedis
from .libs.helpers import set_running, unset_running, get_socket_path, load_config_files from .libs.helpers import set_running, unset_running, get_socket_path, load_config_files
from datetime import date from datetime import datetime, date, timedelta
from ipaddress import ip_network from ipaddress import ip_network
from pathlib import Path from pathlib import Path
@ -22,40 +22,34 @@ class Ranking():
self.logger = logging.getLogger(f'{self.__class__.__name__}') self.logger = logging.getLogger(f'{self.__class__.__name__}')
self.logger.setLevel(loglevel) self.logger.setLevel(loglevel)
def compute(self): def rank_a_day(self, day: str):
self.logger.info('Start ranking') # FIXME: If we want to rank an older date, we need to hav older datasets for the announces
set_running(self.__class__.__name__)
today = date.today().isoformat()
v4_last, v6_last = self.asn_meta.mget('v4|last', 'v6|last') v4_last, v6_last = self.asn_meta.mget('v4|last', 'v6|last')
if not v4_last or not v6_last: asns_aggregation_key_v4 = f'{day}|asns|v4'
'''Failsafe if asn_meta has not been populated yet''' asns_aggregation_key_v6 = f'{day}|asns|v6'
unset_running(self.__class__.__name__)
return
asns_aggregation_key_v4 = f'{today}|asns|v4'
asns_aggregation_key_v6 = f'{today}|asns|v6'
to_delete = set([asns_aggregation_key_v4, asns_aggregation_key_v6]) to_delete = set([asns_aggregation_key_v4, asns_aggregation_key_v6])
r_pipeline = self.ranking.pipeline() r_pipeline = self.ranking.pipeline()
for source in self.storage.smembers(f'{today}|sources'): for source in self.storage.smembers(f'{day}|sources'):
self.logger.info(f'{today} - Ranking source: {source}') self.logger.info(f'{day} - Ranking source: {source}')
source_aggregation_key_v4 = f'{today}|{source}|asns|v4' source_aggregation_key_v4 = f'{day}|{source}|asns|v4'
source_aggregation_key_v6 = f'{today}|{source}|asns|v6' source_aggregation_key_v6 = f'{day}|{source}|asns|v6'
to_delete.update([source_aggregation_key_v4, source_aggregation_key_v6]) to_delete.update([source_aggregation_key_v4, source_aggregation_key_v6])
for asn in self.storage.smembers(f'{today}|{source}'): for asn in self.storage.smembers(f'{day}|{source}'):
prefixes_aggregation_key_v4 = f'{today}|{asn}|v4' prefixes_aggregation_key_v4 = f'{day}|{asn}|v4'
prefixes_aggregation_key_v6 = f'{today}|{asn}|v6' prefixes_aggregation_key_v6 = f'{day}|{asn}|v6'
to_delete.update([prefixes_aggregation_key_v4, prefixes_aggregation_key_v6]) to_delete.update([prefixes_aggregation_key_v4, prefixes_aggregation_key_v6])
if asn == '0': if asn == '0':
# Default ASN when no matches. Probably spoofed. # Default ASN when no matches. Probably spoofed.
continue continue
self.logger.debug(f'{today} - Ranking source: {source} / ASN: {asn}') self.logger.debug(f'{day} - Ranking source: {source} / ASN: {asn}')
asn_rank_v4 = 0.0 asn_rank_v4 = 0.0
asn_rank_v6 = 0.0 asn_rank_v6 = 0.0
for prefix in self.storage.smembers(f'{today}|{source}|{asn}'): for prefix in self.storage.smembers(f'{day}|{source}|{asn}'):
ips = set([ip_ts.split('|')[0] ips = set([ip_ts.split('|')[0]
for ip_ts in self.storage.smembers(f'{today}|{source}|{asn}|{prefix}')]) for ip_ts in self.storage.smembers(f'{day}|{source}|{asn}|{prefix}')])
py_prefix = ip_network(prefix) py_prefix = ip_network(prefix)
prefix_rank = float(len(ips)) / py_prefix.num_addresses prefix_rank = float(len(ips)) / py_prefix.num_addresses
r_pipeline.zadd(f'{today}|{source}|{asn}|v{py_prefix.version}|prefixes', prefix_rank, prefix) r_pipeline.zadd(f'{day}|{source}|{asn}|v{py_prefix.version}|prefixes', prefix_rank, prefix)
if py_prefix.version == 4: if py_prefix.version == 4:
asn_rank_v4 += len(ips) * self.config_files[source]['impact'] asn_rank_v4 += len(ips) * self.config_files[source]['impact']
r_pipeline.zincrby(prefixes_aggregation_key_v4, prefix, prefix_rank * self.config_files[source]['impact']) r_pipeline.zincrby(prefixes_aggregation_key_v4, prefix, prefix_rank * self.config_files[source]['impact'])
@ -66,17 +60,32 @@ class Ranking():
if v4count: if v4count:
asn_rank_v4 /= float(v4count) asn_rank_v4 /= float(v4count)
if asn_rank_v4: if asn_rank_v4:
r_pipeline.set(f'{today}|{source}|{asn}|v4', asn_rank_v4) r_pipeline.set(f'{day}|{source}|{asn}|v4', asn_rank_v4)
r_pipeline.zincrby(asns_aggregation_key_v4, asn, asn_rank_v4) r_pipeline.zincrby(asns_aggregation_key_v4, asn, asn_rank_v4)
r_pipeline.zadd(source_aggregation_key_v4, asn_rank_v4, asn) r_pipeline.zadd(source_aggregation_key_v4, asn_rank_v4, asn)
if v6count: if v6count:
asn_rank_v6 /= float(v6count) asn_rank_v6 /= float(v6count)
if asn_rank_v6: if asn_rank_v6:
r_pipeline.set(f'{today}|{source}|{asn}|v6', asn_rank_v6) r_pipeline.set(f'{day}|{source}|{asn}|v6', asn_rank_v6)
r_pipeline.zincrby(asns_aggregation_key_v6, asn, asn_rank_v6) r_pipeline.zincrby(asns_aggregation_key_v6, asn, asn_rank_v6)
r_pipeline.zadd(source_aggregation_key_v6, asn_rank_v4, asn) r_pipeline.zadd(source_aggregation_key_v6, asn_rank_v4, asn)
self.ranking.delete(*to_delete) self.ranking.delete(*to_delete)
r_pipeline.execute() r_pipeline.execute()
def compute(self):
self.logger.info('Start ranking')
set_running(self.__class__.__name__)
v4_last, v6_last = self.asn_meta.mget('v4|last', 'v6|last')
if not v4_last or not v6_last:
'''Failsafe if asn_meta has not been populated yet'''
unset_running(self.__class__.__name__)
return
today = date.today()
now = datetime.now()
today12am = now.replace(hour=12, minute=0, second=0, microsecond=0)
if now < today12am:
# Compute yesterday and today's ranking (useful when we have lists generated only once a day)
self.rank_a_day((today - timedelta(days=1)).isoformat())
self.rank_a_day(today.isoformat())
unset_running(self.__class__.__name__) unset_running(self.__class__.__name__)
self.logger.info('Ranking done.') self.logger.info('Ranking done.')

View File

@ -0,0 +1,159 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import aiohttp
import logging
from bs4 import BeautifulSoup
from dateutil.parser import parse
from typing import Tuple
from datetime import datetime, date
from pathlib import Path
from .libs.helpers import safe_create_dir, set_running, unset_running
import json
class ShadowServerFetcher():
def __init__(self, user, password, config_path_modules: Path, storage_directory: Path,
loglevel: int=logging.DEBUG) -> None:
self.__init_logger(loglevel)
self.storage_directory = storage_directory
self.config_path_modules = config_path_modules
self.user = user
self.password = password
self.index_page = 'https://dl.shadowserver.org/reports/index.php'
self.vendor = 'shadowserver'
self.known_list_types = ('blacklist', 'botnet', 'cc', 'cisco', 'cwsandbox', 'drone',
'microsoft', 'scan', 'sinkhole6', 'sinkhole')
self.first_available_day = None
self.last_available_day = None
self.available_entries = {}
def __init_logger(self, loglevel):
self.logger = logging.getLogger(f'{self.__class__.__name__}')
self.logger.setLevel(loglevel)
async def __get_index(self):
auth_details = {'user': self.user, 'password': self.password, 'login': 'Login'}
async with aiohttp.ClientSession() as s:
self.logger.debug('Fetching the index.')
async with s.post(self.index_page, data=auth_details) as r:
return await r.text()
async def __build_daily_dict(self):
html_index = await self.__get_index()
soup = BeautifulSoup(html_index, 'html.parser')
treeview = soup.find(id='treemenu1')
for y in treeview.select('> li'):
year = y.contents[0]
for m in y.contents[1].select('> li'):
month = m.contents[0]
for d in m.contents[1].select('> li'):
day = d.contents[0]
date = parse(f'{year} {month} {day}').date()
self.available_entries[date.isoformat()] = []
for a in d.contents[1].find_all('a', href=True):
if not self.first_available_day:
self.first_available_day = date
self.last_available_day = date
self.available_entries[date.isoformat()].append((a['href'], a.string))
self.logger.debug('Dictionary created.')
def __normalize_day(self, day: Tuple[str, date, datetime]=None) -> str:
if not day:
if not self.last_available_day:
raise Exception('Unable to figure out the last available day. You need to run build_daily_dict first')
day = self.last_available_day
else:
if isinstance(day, str):
day = parse(day).date()
elif isinstance(day, datetime):
day = day.date()
return day.isoformat()
def __split_name(self, name):
type_content, country, list_type = name.split('-')
if '_' in type_content:
type_content, details_type = type_content.split('_', maxsplit=1)
if '_' in details_type:
details_type, sub = details_type.split('_')
return list_type, country, (type_content, details_type, sub)
return list_type, country, (type_content, details_type)
return list_type, country, (type_content)
def __check_config(self, filename: str) -> Path:
self.logger.debug(f'Working on config for {filename}.')
config = {'vendor': 'shadowserver', 'parser': '.parsers.shadowserver'}
type_content, _, type_details = self.__split_name(filename)
prefix = type_content.split('.')[0]
config['name'] = '{}-{}'.format(prefix, '_'.join(type_details))
main_type = type_details[0]
if main_type not in self.known_list_types:
self.logger.warning(f'Unknown type: {main_type}. Please update the config creator script.')
return None
if main_type == 'blacklist':
config['impact'] = 5
elif main_type == 'botnet':
config['impact'] = 2
elif main_type == 'cc':
config['impact'] = 5
elif main_type == 'cisco':
config['impact'] = 3
elif main_type == 'cwsandbox':
config['impact'] = 5
elif main_type == 'drone':
config['impact'] = 2
elif main_type == 'microsoft':
config['impact'] = 3
elif main_type == 'scan':
config['impact'] = 1
elif main_type == 'sinkhole6':
config['impact'] = 2
elif main_type == 'sinkhole':
config['impact'] = 2
if not (self.config_path_modules / f"{config['vendor']}_{config['name']}.json").exists():
self.logger.debug(f'Creating config file for {filename}.')
with open(self.config_path_modules / f"{config['vendor']}_{config['name']}.json", 'w') as f:
json.dump(config, f, indent=2)
else:
with open(self.config_path_modules / f"{config['vendor']}_{config['name']}.json", 'r') as f:
# Validate new config file with old
config_current = json.load(f)
if config_current != config:
self.logger.warning('The config file created by this script is different from the one on disk: \n{}\n{}'.format(json.dumps(config), json.dumps(config_current)))
# Init list directory
directory = self.storage_directory / config['vendor'] / config['name']
safe_create_dir(directory)
meta = directory / 'meta'
safe_create_dir(meta)
archive_dir = directory / 'archive'
safe_create_dir(archive_dir)
self.logger.debug(f'Done with config for {filename}.')
return directory
async def download_daily_entries(self, day: Tuple[str, date, datetime]=None):
set_running(f'{self.__class__.__name__}')
await self.__build_daily_dict()
for url, filename in self.available_entries[self.__normalize_day(day)]:
storage_dir = self.__check_config(filename)
if not storage_dir:
continue
# Check if the file we're trying to download has already been downloaded. Skip if True.
uuid = url.split('/')[-1]
if (storage_dir / 'meta' / 'last_download').exists():
with open(storage_dir / 'meta' / 'last_download') as f:
last_download_uuid = f.read()
if last_download_uuid == uuid:
self.logger.debug(f'Already downloaded: {url}.')
continue
async with aiohttp.ClientSession() as s:
async with s.get(url) as r:
self.logger.info(f'Downloading {url}.')
content = await r.content.read()
with (storage_dir / '{}.txt'.format(datetime.now().isoformat())).open('wb') as f:
f.write(content)
with open(storage_dir / 'meta' / 'last_download', 'w') as f:
f.write(uuid)
unset_running(f'{self.__class__.__name__}')

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from bgpranking.archive import DeepArchive from bgpranking.archive import DeepArchive

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import logging import logging

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import logging import logging

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import logging import logging
@ -21,20 +21,28 @@ class ModulesManager(AbstractManager):
if not config_dir: if not config_dir:
config_dir = get_config_path() config_dir = get_config_path()
if not storage_directory: if not storage_directory:
storage_directory = get_homedir() / 'rawdata' self.storage_directory = get_homedir() / 'rawdata'
modules_config = config_dir / 'modules' self.modules_config = config_dir / 'modules'
modules_paths = [modulepath for modulepath in modules_config.glob('*.json')] self.modules_paths = [modulepath for modulepath in self.modules_config.glob('*.json')]
self.modules = [Fetcher(path, storage_directory, loglevel) for path in modules_paths] self.modules = [Fetcher(path, self.storage_directory, loglevel) for path in self.modules_paths]
def _to_run_forever(self): def _to_run_forever(self):
loop = asyncio.get_event_loop() # Check if there are new config files
try: new_modules_paths = [modulepath for modulepath in self.modules_config.glob('*.json') if modulepath not in self.modules_paths]
loop.run_until_complete(asyncio.gather( self.modules += [Fetcher(path, self.storage_directory, self.loglevel) for path in new_modules_paths]
*[module.fetch_list() for module in self.modules if module.fetcher], self.modules_paths += new_modules_paths
return_exceptions=True)
) if self.modules:
except aiohttp.client_exceptions.ClientConnectorError as e: loop = asyncio.get_event_loop()
self.logger.critical(f'Exception while fetching lists: {e}') try:
loop.run_until_complete(asyncio.gather(
*[module.fetch_list() for module in self.modules if module.fetcher],
return_exceptions=True)
)
except aiohttp.client_exceptions.ClientConnectorError as e:
self.logger.critical(f'Exception while fetching lists: {e}')
else:
self.logger.info('No config files were found so there are no fetchers running yet. Will try again later.')
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import logging import logging

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from bgpranking.monitor import Monitor from bgpranking.monitor import Monitor

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import logging import logging
@ -19,13 +19,21 @@ class ParserManager(AbstractManager):
if not config_dir: if not config_dir:
config_dir = get_config_path() config_dir = get_config_path()
if not storage_directory: if not storage_directory:
storage_directory = get_homedir() / 'rawdata' self.storage_directory = get_homedir() / 'rawdata'
modules_config = config_dir / 'modules' self.modules_config = config_dir / 'modules'
modules_paths = [modulepath for modulepath in modules_config.glob('*.json')] self.modules_paths = [modulepath for modulepath in self.modules_config.glob('*.json')]
self.modules = [RawFilesParser(path, storage_directory, loglevel) for path in modules_paths] self.modules = [RawFilesParser(path, self.storage_directory, loglevel) for path in self.modules_paths]
def _to_run_forever(self): def _to_run_forever(self):
[module.parse_raw_files() for module in self.modules] # Check if there are new config files
new_modules_paths = [modulepath for modulepath in self.modules_config.glob('*.json') if modulepath not in self.modules_paths]
self.modules += [RawFilesParser(path, self.storage_directory, self.loglevel) for path in new_modules_paths]
self.modules_paths += new_modules_paths
if self.modules:
[module.parse_raw_files() for module in self.modules]
else:
self.logger.warning('No config files were found so there are no parsers running yet. Will try again later.')
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import logging import logging

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import logging import logging

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from bgpranking.libs.helpers import get_homedir, check_running from bgpranking.libs.helpers import get_homedir, check_running

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import logging import logging

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from bgpranking.libs.helpers import is_running, get_socket_path from bgpranking.libs.helpers import is_running, get_socket_path

47
bin/ssfetcher.py Executable file
View File

@ -0,0 +1,47 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import logging
import json
import asyncio
from pathlib import Path
import aiohttp
from bgpranking.abstractmanager import AbstractManager
from bgpranking.shadowserverfetcher import ShadowServerFetcher
from bgpranking.libs.helpers import get_config_path, get_homedir
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
level=logging.INFO, datefmt='%I:%M:%S')
class ShadowServerManager(AbstractManager):
def __init__(self, config_dir: Path=None, storage_directory: Path=None, loglevel: int=logging.DEBUG):
super().__init__(loglevel)
self.config = True
if not config_dir:
config_dir = get_config_path()
if not (config_dir / 'shadowserver.json').exists():
self.config = False
self.logger.warning(f'No config file available, the shadow server module will not be launched.')
return
with open(config_dir / 'shadowserver.json') as f:
ss_config = json.load(f)
if not storage_directory:
storage_directory = get_homedir() / 'rawdata'
modules_config = config_dir / 'modules'
self.fetcher = ShadowServerFetcher(ss_config['user'], ss_config['password'], modules_config, storage_directory, loglevel)
def _to_run_forever(self):
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(self.fetcher.download_daily_entries())
except aiohttp.client_exceptions.ClientConnectorError as e:
self.logger.critical(f'Exception while fetching Shadow Server lists: {e}')
if __name__ == '__main__':
modules_manager = ShadowServerManager()
if modules_manager.config:
modules_manager.run(sleep_in_sec=3600)

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from subprocess import Popen from subprocess import Popen
@ -13,6 +13,7 @@ if __name__ == '__main__':
Popen(['loadprefixes.py']) Popen(['loadprefixes.py'])
Popen(['rislookup.py']) Popen(['rislookup.py'])
Popen(['fetcher.py']) Popen(['fetcher.py'])
Popen(['ssfetcher.py'])
Popen(['parser.py']) Popen(['parser.py'])
Popen(['sanitizer.py']) Popen(['sanitizer.py'])
Popen(['dbinsert.py']) Popen(['dbinsert.py'])

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from subprocess import Popen from subprocess import Popen

View File

@ -13,7 +13,7 @@ setup(
description='BGP Ranking, the new one..', description='BGP Ranking, the new one..',
packages=['bgpranking'], packages=['bgpranking'],
scripts=['bin/archiver.py', 'bin/dbinsert.py', 'bin/fetcher.py', 'bin/parser.py', scripts=['bin/archiver.py', 'bin/dbinsert.py', 'bin/fetcher.py', 'bin/parser.py',
'bin/loadprefixes.py', 'bin/rislookup.py', 'bin/sanitizer.py', 'bin/run_backend.py', 'bin/loadprefixes.py', 'bin/rislookup.py', 'bin/sanitizer.py', 'bin/run_backend.py', 'bin/ssfetcher.py',
'bin/monitor.py', 'bin/ranking.py', 'bin/asn_descriptions.py', 'bin/start.py', 'bin/stop.py', 'bin/shutdown.py'], 'bin/monitor.py', 'bin/ranking.py', 'bin/asn_descriptions.py', 'bin/start.py', 'bin/stop.py', 'bin/shutdown.py'],
classifiers=[ classifiers=[
'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)', 'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)',