new: Add Shadowserver module
parent
faa3c634f2
commit
6045635b72
|
@ -110,3 +110,11 @@ rawdata
|
|||
storage/ardb.pid
|
||||
storage/data
|
||||
storage/repl
|
||||
|
||||
# Config file shadow server with password
|
||||
bgpranking/config/shadowserver.json
|
||||
# Ths shadow server config files are dynamically generated
|
||||
bgpranking/config/modules/shadowserver_*.json
|
||||
|
||||
# Do not store the d3 lib in the repo
|
||||
website/web/static/d3*.js
|
||||
|
|
|
@ -10,6 +10,7 @@ from .libs.helpers import long_sleep, shutdown_requested
|
|||
class AbstractManager(ABC):
|
||||
|
||||
def __init__(self, loglevel: int=logging.DEBUG):
|
||||
self.loglevel = loglevel
|
||||
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
||||
self.logger.setLevel(loglevel)
|
||||
self.logger.info(f'Initializing {self.__class__.__name__}')
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
set -x
|
||||
|
||||
find . -maxdepth 1 -type f -name "*.json" ! -iname "shadowserver*.json" -delete
|
|
@ -0,0 +1,4 @@
|
|||
{
|
||||
"user": "[USERNAME]",
|
||||
"password": "[PASSWORD]"
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import aiohttp
|
||||
|
@ -112,10 +112,10 @@ class Fetcher():
|
|||
self.logger.debug('New list, no hisorical files')
|
||||
# nothing has been downloaded ever, moving on
|
||||
return False
|
||||
dl_hash = sha512(downloaded)
|
||||
for last_file in to_check:
|
||||
with last_file.open('rb') as f:
|
||||
last_hash = sha512(f.read())
|
||||
dl_hash = sha512(downloaded)
|
||||
if (dl_hash.digest() == last_hash.digest() and
|
||||
parser.parse(last_file.name.split('.')[0]).date() == date.today()):
|
||||
self.logger.debug('Same file already downloaded today.')
|
||||
|
|
|
@ -11,7 +11,7 @@ from uuid import uuid4
|
|||
from io import BytesIO
|
||||
import importlib
|
||||
|
||||
from typing import List
|
||||
from typing import List, Union, Tuple
|
||||
import types
|
||||
|
||||
from .libs.helpers import safe_create_dir, set_running, unset_running, get_socket_path
|
||||
|
@ -20,7 +20,7 @@ from .libs.helpers import safe_create_dir, set_running, unset_running, get_socke
|
|||
class RawFilesParser():
|
||||
|
||||
def __init__(self, config_file: Path, storage_directory: Path,
|
||||
loglevel: int=logging.DEBUG):
|
||||
loglevel: int=logging.DEBUG) -> None:
|
||||
with open(config_file, 'r') as f:
|
||||
module_parameters = json.load(f)
|
||||
self.vendor = module_parameters['vendor']
|
||||
|
@ -36,7 +36,7 @@ class RawFilesParser():
|
|||
self.redis_intake = StrictRedis(unix_socket_path=get_socket_path('intake'), db=0)
|
||||
self.logger.debug(f'Starting intake on {self.source}')
|
||||
|
||||
def __init_logger(self, loglevel):
|
||||
def __init_logger(self, loglevel) -> None:
|
||||
self.logger = logging.getLogger(f'{self.__class__.__name__}-{self.vendor}-{self.listname}')
|
||||
self.logger.setLevel(loglevel)
|
||||
|
||||
|
@ -52,12 +52,12 @@ class RawFilesParser():
|
|||
Only run it when needed, it is nasty and slow'''
|
||||
return ['.'.join(str(int(part)) for part in ip.split(b'.')).encode() for ip in ips]
|
||||
|
||||
def parse_raw_file(self, f: BytesIO):
|
||||
def parse_raw_file(self, f: BytesIO) -> List[bytes]:
|
||||
# If the list doesn't provide a time, fallback to current day, midnight
|
||||
self.datetime = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
return self.extract_ipv4(f.getvalue())
|
||||
|
||||
def parse_raw_files(self):
|
||||
def parse_raw_files(self) -> None:
|
||||
set_running(f'{self.__class__.__name__}-{self.source}')
|
||||
nb_unparsable_files = len([f for f in self.unparsable_dir.iterdir() if f.is_file()])
|
||||
if nb_unparsable_files:
|
||||
|
@ -69,9 +69,13 @@ class RawFilesParser():
|
|||
to_parse = BytesIO(f.read())
|
||||
p = self.redis_intake.pipeline()
|
||||
for ip in self.parse_raw_file(to_parse):
|
||||
if isinstance(ip, tuple):
|
||||
ip, datetime = ip
|
||||
else:
|
||||
datetime = self.datetime
|
||||
uuid = uuid4()
|
||||
p.hmset(uuid, {'ip': ip, 'source': self.source,
|
||||
'datetime': self.datetime.isoformat()})
|
||||
'datetime': datetime.isoformat()})
|
||||
p.sadd('intake', uuid)
|
||||
p.execute()
|
||||
self._archive(filepath)
|
||||
|
@ -81,10 +85,10 @@ class RawFilesParser():
|
|||
finally:
|
||||
unset_running(f'{self.__class__.__name__}-{self.source}')
|
||||
|
||||
def _archive(self, filepath: Path):
|
||||
def _archive(self, filepath: Path) -> None:
|
||||
'''After processing, move file to the archive directory'''
|
||||
filepath.rename(self.directory / 'archive' / filepath.name)
|
||||
|
||||
def _unparsable(self, filepath: Path):
|
||||
def _unparsable(self, filepath: Path) -> None:
|
||||
'''After processing, move file to the archive directory'''
|
||||
filepath.rename(self.unparsable_dir / filepath.name)
|
||||
|
|
|
@ -1,11 +1,13 @@
|
|||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from dateutil.parser import parse
|
||||
import re
|
||||
from io import BytesIO
|
||||
|
||||
from typing import List
|
||||
|
||||
def parse_raw_file(self, f: BytesIO):
|
||||
|
||||
def parse_raw_file(self, f: BytesIO) -> List[bytes]:
|
||||
self.datetime = parse(re.findall(b'# Generated on (.*)#\n', f.getvalue())[0])
|
||||
return self.extract_ipv4(f.getvalue())
|
||||
|
|
|
@ -1,11 +1,13 @@
|
|||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from dateutil.parser import parse
|
||||
import re
|
||||
from io import BytesIO
|
||||
|
||||
from typing import List
|
||||
|
||||
def parse_raw_file(self, f: BytesIO):
|
||||
|
||||
def parse_raw_file(self, f: BytesIO) -> List[bytes]:
|
||||
self.datetime = parse(re.findall(b'## Feed generated at: (.*)\n', f.getvalue())[0])
|
||||
return self.extract_ipv4(f.getvalue())
|
||||
|
|
|
@ -1,14 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from io import BytesIO
|
||||
from datetime import datetime
|
||||
|
||||
from ..simple_feed_fetcher import RawFileImporter
|
||||
|
||||
|
||||
class DefaultImporter(RawFileImporter):
|
||||
|
||||
def parse_raw_file(self, f: BytesIO):
|
||||
self.datetime = datetime.now()
|
||||
return self.extract_ipv4(f.getvalue())
|
|
@ -4,9 +4,10 @@
|
|||
from dateutil.parser import parse
|
||||
import re
|
||||
from io import BytesIO
|
||||
from typing import List
|
||||
|
||||
|
||||
def parse_raw_file(self, f: BytesIO):
|
||||
def parse_raw_file(self, f: BytesIO) -> List[bytes]:
|
||||
self.datetime = parse(re.findall(b'# updated (.*)\n', f.getvalue())[0])
|
||||
iplist = self.extract_ipv4(f.getvalue())
|
||||
# The IPS have leading 0s. Getting tid of them directly here.
|
||||
|
|
|
@ -1,11 +1,13 @@
|
|||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from dateutil.parser import parse
|
||||
import re
|
||||
from io import BytesIO
|
||||
|
||||
from typing import List
|
||||
|
||||
def parse_raw_file(self, f: BytesIO):
|
||||
|
||||
def parse_raw_file(self, f: BytesIO) -> List[bytes]:
|
||||
self.datetime = parse(re.findall(b'// Last updated (.*)\n', f.getvalue())[0])
|
||||
return self.extract_ipv4(f.getvalue())
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from dateutil.parser import parse
|
||||
import re
|
||||
from io import BytesIO
|
||||
|
||||
from typing import List
|
||||
|
||||
def parse_raw_file(self, f: BytesIO):
|
||||
def parse_raw_file(self, f: BytesIO) -> List[bytes]:
|
||||
self.datetime = parse(re.findall(b'# Generated (.*)\n', f.getvalue())[0])
|
||||
return self.extract_ipv4(f.getvalue())
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from dateutil.parser import parse
|
||||
from csv import DictReader
|
||||
from io import BytesIO, StringIO
|
||||
from typing import Tuple, Generator
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
def parse_raw_file(self, f: BytesIO) -> Generator[Tuple[str, datetime], None, None]:
|
||||
default_ts = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
reader = DictReader(StringIO(f.getvalue().decode()))
|
||||
for row in reader:
|
||||
if 'timestamp' in row:
|
||||
ts = parse(row['timestamp'])
|
||||
else:
|
||||
ts = default_ts
|
||||
|
||||
if 'ip' in row:
|
||||
ip = row['ip']
|
||||
elif 'src_ip' in row:
|
||||
# For sinkhole6_http
|
||||
ip = row['src_ip']
|
||||
else:
|
||||
self.logger.critical(f'No IPs in the list {self.source}.')
|
||||
break
|
||||
yield ip, ts
|
|
@ -4,7 +4,7 @@
|
|||
import logging
|
||||
from redis import StrictRedis
|
||||
from .libs.helpers import set_running, unset_running, get_socket_path, load_config_files
|
||||
from datetime import date
|
||||
from datetime import datetime, date, timedelta
|
||||
from ipaddress import ip_network
|
||||
from pathlib import Path
|
||||
|
||||
|
@ -22,40 +22,34 @@ class Ranking():
|
|||
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
||||
self.logger.setLevel(loglevel)
|
||||
|
||||
def compute(self):
|
||||
self.logger.info('Start ranking')
|
||||
set_running(self.__class__.__name__)
|
||||
today = date.today().isoformat()
|
||||
def rank_a_day(self, day: str):
|
||||
# FIXME: If we want to rank an older date, we need to hav older datasets for the announces
|
||||
v4_last, v6_last = self.asn_meta.mget('v4|last', 'v6|last')
|
||||
if not v4_last or not v6_last:
|
||||
'''Failsafe if asn_meta has not been populated yet'''
|
||||
unset_running(self.__class__.__name__)
|
||||
return
|
||||
asns_aggregation_key_v4 = f'{today}|asns|v4'
|
||||
asns_aggregation_key_v6 = f'{today}|asns|v6'
|
||||
asns_aggregation_key_v4 = f'{day}|asns|v4'
|
||||
asns_aggregation_key_v6 = f'{day}|asns|v6'
|
||||
to_delete = set([asns_aggregation_key_v4, asns_aggregation_key_v6])
|
||||
r_pipeline = self.ranking.pipeline()
|
||||
for source in self.storage.smembers(f'{today}|sources'):
|
||||
self.logger.info(f'{today} - Ranking source: {source}')
|
||||
source_aggregation_key_v4 = f'{today}|{source}|asns|v4'
|
||||
source_aggregation_key_v6 = f'{today}|{source}|asns|v6'
|
||||
for source in self.storage.smembers(f'{day}|sources'):
|
||||
self.logger.info(f'{day} - Ranking source: {source}')
|
||||
source_aggregation_key_v4 = f'{day}|{source}|asns|v4'
|
||||
source_aggregation_key_v6 = f'{day}|{source}|asns|v6'
|
||||
to_delete.update([source_aggregation_key_v4, source_aggregation_key_v6])
|
||||
for asn in self.storage.smembers(f'{today}|{source}'):
|
||||
prefixes_aggregation_key_v4 = f'{today}|{asn}|v4'
|
||||
prefixes_aggregation_key_v6 = f'{today}|{asn}|v6'
|
||||
for asn in self.storage.smembers(f'{day}|{source}'):
|
||||
prefixes_aggregation_key_v4 = f'{day}|{asn}|v4'
|
||||
prefixes_aggregation_key_v6 = f'{day}|{asn}|v6'
|
||||
to_delete.update([prefixes_aggregation_key_v4, prefixes_aggregation_key_v6])
|
||||
if asn == '0':
|
||||
# Default ASN when no matches. Probably spoofed.
|
||||
continue
|
||||
self.logger.debug(f'{today} - Ranking source: {source} / ASN: {asn}')
|
||||
self.logger.debug(f'{day} - Ranking source: {source} / ASN: {asn}')
|
||||
asn_rank_v4 = 0.0
|
||||
asn_rank_v6 = 0.0
|
||||
for prefix in self.storage.smembers(f'{today}|{source}|{asn}'):
|
||||
for prefix in self.storage.smembers(f'{day}|{source}|{asn}'):
|
||||
ips = set([ip_ts.split('|')[0]
|
||||
for ip_ts in self.storage.smembers(f'{today}|{source}|{asn}|{prefix}')])
|
||||
for ip_ts in self.storage.smembers(f'{day}|{source}|{asn}|{prefix}')])
|
||||
py_prefix = ip_network(prefix)
|
||||
prefix_rank = float(len(ips)) / py_prefix.num_addresses
|
||||
r_pipeline.zadd(f'{today}|{source}|{asn}|v{py_prefix.version}|prefixes', prefix_rank, prefix)
|
||||
r_pipeline.zadd(f'{day}|{source}|{asn}|v{py_prefix.version}|prefixes', prefix_rank, prefix)
|
||||
if py_prefix.version == 4:
|
||||
asn_rank_v4 += len(ips) * self.config_files[source]['impact']
|
||||
r_pipeline.zincrby(prefixes_aggregation_key_v4, prefix, prefix_rank * self.config_files[source]['impact'])
|
||||
|
@ -66,17 +60,32 @@ class Ranking():
|
|||
if v4count:
|
||||
asn_rank_v4 /= float(v4count)
|
||||
if asn_rank_v4:
|
||||
r_pipeline.set(f'{today}|{source}|{asn}|v4', asn_rank_v4)
|
||||
r_pipeline.set(f'{day}|{source}|{asn}|v4', asn_rank_v4)
|
||||
r_pipeline.zincrby(asns_aggregation_key_v4, asn, asn_rank_v4)
|
||||
r_pipeline.zadd(source_aggregation_key_v4, asn_rank_v4, asn)
|
||||
if v6count:
|
||||
asn_rank_v6 /= float(v6count)
|
||||
if asn_rank_v6:
|
||||
r_pipeline.set(f'{today}|{source}|{asn}|v6', asn_rank_v6)
|
||||
r_pipeline.set(f'{day}|{source}|{asn}|v6', asn_rank_v6)
|
||||
r_pipeline.zincrby(asns_aggregation_key_v6, asn, asn_rank_v6)
|
||||
r_pipeline.zadd(source_aggregation_key_v6, asn_rank_v4, asn)
|
||||
self.ranking.delete(*to_delete)
|
||||
r_pipeline.execute()
|
||||
|
||||
def compute(self):
|
||||
self.logger.info('Start ranking')
|
||||
set_running(self.__class__.__name__)
|
||||
v4_last, v6_last = self.asn_meta.mget('v4|last', 'v6|last')
|
||||
if not v4_last or not v6_last:
|
||||
'''Failsafe if asn_meta has not been populated yet'''
|
||||
unset_running(self.__class__.__name__)
|
||||
return
|
||||
today = date.today()
|
||||
now = datetime.now()
|
||||
today12am = now.replace(hour=12, minute=0, second=0, microsecond=0)
|
||||
if now < today12am:
|
||||
# Compute yesterday and today's ranking (useful when we have lists generated only once a day)
|
||||
self.rank_a_day((today - timedelta(days=1)).isoformat())
|
||||
self.rank_a_day(today.isoformat())
|
||||
unset_running(self.__class__.__name__)
|
||||
self.logger.info('Ranking done.')
|
||||
|
|
|
@ -0,0 +1,159 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import aiohttp
|
||||
import logging
|
||||
from bs4 import BeautifulSoup
|
||||
from dateutil.parser import parse
|
||||
from typing import Tuple
|
||||
from datetime import datetime, date
|
||||
from pathlib import Path
|
||||
from .libs.helpers import safe_create_dir, set_running, unset_running
|
||||
import json
|
||||
|
||||
|
||||
class ShadowServerFetcher():
|
||||
|
||||
def __init__(self, user, password, config_path_modules: Path, storage_directory: Path,
|
||||
loglevel: int=logging.DEBUG) -> None:
|
||||
self.__init_logger(loglevel)
|
||||
self.storage_directory = storage_directory
|
||||
self.config_path_modules = config_path_modules
|
||||
self.user = user
|
||||
self.password = password
|
||||
self.index_page = 'https://dl.shadowserver.org/reports/index.php'
|
||||
self.vendor = 'shadowserver'
|
||||
self.known_list_types = ('blacklist', 'botnet', 'cc', 'cisco', 'cwsandbox', 'drone',
|
||||
'microsoft', 'scan', 'sinkhole6', 'sinkhole')
|
||||
self.first_available_day = None
|
||||
self.last_available_day = None
|
||||
self.available_entries = {}
|
||||
|
||||
def __init_logger(self, loglevel):
|
||||
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
||||
self.logger.setLevel(loglevel)
|
||||
|
||||
async def __get_index(self):
|
||||
auth_details = {'user': self.user, 'password': self.password, 'login': 'Login'}
|
||||
async with aiohttp.ClientSession() as s:
|
||||
self.logger.debug('Fetching the index.')
|
||||
async with s.post(self.index_page, data=auth_details) as r:
|
||||
return await r.text()
|
||||
|
||||
async def __build_daily_dict(self):
|
||||
html_index = await self.__get_index()
|
||||
soup = BeautifulSoup(html_index, 'html.parser')
|
||||
treeview = soup.find(id='treemenu1')
|
||||
for y in treeview.select('> li'):
|
||||
year = y.contents[0]
|
||||
for m in y.contents[1].select('> li'):
|
||||
month = m.contents[0]
|
||||
for d in m.contents[1].select('> li'):
|
||||
day = d.contents[0]
|
||||
date = parse(f'{year} {month} {day}').date()
|
||||
self.available_entries[date.isoformat()] = []
|
||||
for a in d.contents[1].find_all('a', href=True):
|
||||
if not self.first_available_day:
|
||||
self.first_available_day = date
|
||||
self.last_available_day = date
|
||||
self.available_entries[date.isoformat()].append((a['href'], a.string))
|
||||
self.logger.debug('Dictionary created.')
|
||||
|
||||
def __normalize_day(self, day: Tuple[str, date, datetime]=None) -> str:
|
||||
if not day:
|
||||
if not self.last_available_day:
|
||||
raise Exception('Unable to figure out the last available day. You need to run build_daily_dict first')
|
||||
day = self.last_available_day
|
||||
else:
|
||||
if isinstance(day, str):
|
||||
day = parse(day).date()
|
||||
elif isinstance(day, datetime):
|
||||
day = day.date()
|
||||
return day.isoformat()
|
||||
|
||||
def __split_name(self, name):
|
||||
type_content, country, list_type = name.split('-')
|
||||
if '_' in type_content:
|
||||
type_content, details_type = type_content.split('_', maxsplit=1)
|
||||
if '_' in details_type:
|
||||
details_type, sub = details_type.split('_')
|
||||
return list_type, country, (type_content, details_type, sub)
|
||||
return list_type, country, (type_content, details_type)
|
||||
return list_type, country, (type_content)
|
||||
|
||||
def __check_config(self, filename: str) -> Path:
|
||||
self.logger.debug(f'Working on config for {filename}.')
|
||||
config = {'vendor': 'shadowserver', 'parser': '.parsers.shadowserver'}
|
||||
type_content, _, type_details = self.__split_name(filename)
|
||||
prefix = type_content.split('.')[0]
|
||||
config['name'] = '{}-{}'.format(prefix, '_'.join(type_details))
|
||||
|
||||
main_type = type_details[0]
|
||||
if main_type not in self.known_list_types:
|
||||
self.logger.warning(f'Unknown type: {main_type}. Please update the config creator script.')
|
||||
return None
|
||||
|
||||
if main_type == 'blacklist':
|
||||
config['impact'] = 5
|
||||
elif main_type == 'botnet':
|
||||
config['impact'] = 2
|
||||
elif main_type == 'cc':
|
||||
config['impact'] = 5
|
||||
elif main_type == 'cisco':
|
||||
config['impact'] = 3
|
||||
elif main_type == 'cwsandbox':
|
||||
config['impact'] = 5
|
||||
elif main_type == 'drone':
|
||||
config['impact'] = 2
|
||||
elif main_type == 'microsoft':
|
||||
config['impact'] = 3
|
||||
elif main_type == 'scan':
|
||||
config['impact'] = 1
|
||||
elif main_type == 'sinkhole6':
|
||||
config['impact'] = 2
|
||||
elif main_type == 'sinkhole':
|
||||
config['impact'] = 2
|
||||
if not (self.config_path_modules / f"{config['vendor']}_{config['name']}.json").exists():
|
||||
self.logger.debug(f'Creating config file for {filename}.')
|
||||
with open(self.config_path_modules / f"{config['vendor']}_{config['name']}.json", 'w') as f:
|
||||
json.dump(config, f, indent=2)
|
||||
else:
|
||||
with open(self.config_path_modules / f"{config['vendor']}_{config['name']}.json", 'r') as f:
|
||||
# Validate new config file with old
|
||||
config_current = json.load(f)
|
||||
if config_current != config:
|
||||
self.logger.warning('The config file created by this script is different from the one on disk: \n{}\n{}'.format(json.dumps(config), json.dumps(config_current)))
|
||||
# Init list directory
|
||||
directory = self.storage_directory / config['vendor'] / config['name']
|
||||
safe_create_dir(directory)
|
||||
meta = directory / 'meta'
|
||||
safe_create_dir(meta)
|
||||
archive_dir = directory / 'archive'
|
||||
safe_create_dir(archive_dir)
|
||||
self.logger.debug(f'Done with config for {filename}.')
|
||||
return directory
|
||||
|
||||
async def download_daily_entries(self, day: Tuple[str, date, datetime]=None):
|
||||
set_running(f'{self.__class__.__name__}')
|
||||
await self.__build_daily_dict()
|
||||
for url, filename in self.available_entries[self.__normalize_day(day)]:
|
||||
storage_dir = self.__check_config(filename)
|
||||
if not storage_dir:
|
||||
continue
|
||||
# Check if the file we're trying to download has already been downloaded. Skip if True.
|
||||
uuid = url.split('/')[-1]
|
||||
if (storage_dir / 'meta' / 'last_download').exists():
|
||||
with open(storage_dir / 'meta' / 'last_download') as f:
|
||||
last_download_uuid = f.read()
|
||||
if last_download_uuid == uuid:
|
||||
self.logger.debug(f'Already downloaded: {url}.')
|
||||
continue
|
||||
async with aiohttp.ClientSession() as s:
|
||||
async with s.get(url) as r:
|
||||
self.logger.info(f'Downloading {url}.')
|
||||
content = await r.content.read()
|
||||
with (storage_dir / '{}.txt'.format(datetime.now().isoformat())).open('wb') as f:
|
||||
f.write(content)
|
||||
with open(storage_dir / 'meta' / 'last_download', 'w') as f:
|
||||
f.write(uuid)
|
||||
unset_running(f'{self.__class__.__name__}')
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from bgpranking.archive import DeepArchive
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
|
@ -21,20 +21,28 @@ class ModulesManager(AbstractManager):
|
|||
if not config_dir:
|
||||
config_dir = get_config_path()
|
||||
if not storage_directory:
|
||||
storage_directory = get_homedir() / 'rawdata'
|
||||
modules_config = config_dir / 'modules'
|
||||
modules_paths = [modulepath for modulepath in modules_config.glob('*.json')]
|
||||
self.modules = [Fetcher(path, storage_directory, loglevel) for path in modules_paths]
|
||||
self.storage_directory = get_homedir() / 'rawdata'
|
||||
self.modules_config = config_dir / 'modules'
|
||||
self.modules_paths = [modulepath for modulepath in self.modules_config.glob('*.json')]
|
||||
self.modules = [Fetcher(path, self.storage_directory, loglevel) for path in self.modules_paths]
|
||||
|
||||
def _to_run_forever(self):
|
||||
loop = asyncio.get_event_loop()
|
||||
try:
|
||||
loop.run_until_complete(asyncio.gather(
|
||||
*[module.fetch_list() for module in self.modules if module.fetcher],
|
||||
return_exceptions=True)
|
||||
)
|
||||
except aiohttp.client_exceptions.ClientConnectorError as e:
|
||||
self.logger.critical(f'Exception while fetching lists: {e}')
|
||||
# Check if there are new config files
|
||||
new_modules_paths = [modulepath for modulepath in self.modules_config.glob('*.json') if modulepath not in self.modules_paths]
|
||||
self.modules += [Fetcher(path, self.storage_directory, self.loglevel) for path in new_modules_paths]
|
||||
self.modules_paths += new_modules_paths
|
||||
|
||||
if self.modules:
|
||||
loop = asyncio.get_event_loop()
|
||||
try:
|
||||
loop.run_until_complete(asyncio.gather(
|
||||
*[module.fetch_list() for module in self.modules if module.fetcher],
|
||||
return_exceptions=True)
|
||||
)
|
||||
except aiohttp.client_exceptions.ClientConnectorError as e:
|
||||
self.logger.critical(f'Exception while fetching lists: {e}')
|
||||
else:
|
||||
self.logger.info('No config files were found so there are no fetchers running yet. Will try again later.')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from bgpranking.monitor import Monitor
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
|
@ -19,13 +19,21 @@ class ParserManager(AbstractManager):
|
|||
if not config_dir:
|
||||
config_dir = get_config_path()
|
||||
if not storage_directory:
|
||||
storage_directory = get_homedir() / 'rawdata'
|
||||
modules_config = config_dir / 'modules'
|
||||
modules_paths = [modulepath for modulepath in modules_config.glob('*.json')]
|
||||
self.modules = [RawFilesParser(path, storage_directory, loglevel) for path in modules_paths]
|
||||
self.storage_directory = get_homedir() / 'rawdata'
|
||||
self.modules_config = config_dir / 'modules'
|
||||
self.modules_paths = [modulepath for modulepath in self.modules_config.glob('*.json')]
|
||||
self.modules = [RawFilesParser(path, self.storage_directory, loglevel) for path in self.modules_paths]
|
||||
|
||||
def _to_run_forever(self):
|
||||
[module.parse_raw_files() for module in self.modules]
|
||||
# Check if there are new config files
|
||||
new_modules_paths = [modulepath for modulepath in self.modules_config.glob('*.json') if modulepath not in self.modules_paths]
|
||||
self.modules += [RawFilesParser(path, self.storage_directory, self.loglevel) for path in new_modules_paths]
|
||||
self.modules_paths += new_modules_paths
|
||||
|
||||
if self.modules:
|
||||
[module.parse_raw_files() for module in self.modules]
|
||||
else:
|
||||
self.logger.warning('No config files were found so there are no parsers running yet. Will try again later.')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from bgpranking.libs.helpers import get_homedir, check_running
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from bgpranking.libs.helpers import is_running, get_socket_path
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
import json
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
import aiohttp
|
||||
|
||||
from bgpranking.abstractmanager import AbstractManager
|
||||
from bgpranking.shadowserverfetcher import ShadowServerFetcher
|
||||
from bgpranking.libs.helpers import get_config_path, get_homedir
|
||||
|
||||
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
||||
level=logging.INFO, datefmt='%I:%M:%S')
|
||||
|
||||
|
||||
class ShadowServerManager(AbstractManager):
|
||||
|
||||
def __init__(self, config_dir: Path=None, storage_directory: Path=None, loglevel: int=logging.DEBUG):
|
||||
super().__init__(loglevel)
|
||||
self.config = True
|
||||
if not config_dir:
|
||||
config_dir = get_config_path()
|
||||
if not (config_dir / 'shadowserver.json').exists():
|
||||
self.config = False
|
||||
self.logger.warning(f'No config file available, the shadow server module will not be launched.')
|
||||
return
|
||||
with open(config_dir / 'shadowserver.json') as f:
|
||||
ss_config = json.load(f)
|
||||
if not storage_directory:
|
||||
storage_directory = get_homedir() / 'rawdata'
|
||||
modules_config = config_dir / 'modules'
|
||||
self.fetcher = ShadowServerFetcher(ss_config['user'], ss_config['password'], modules_config, storage_directory, loglevel)
|
||||
|
||||
def _to_run_forever(self):
|
||||
loop = asyncio.get_event_loop()
|
||||
try:
|
||||
loop.run_until_complete(self.fetcher.download_daily_entries())
|
||||
except aiohttp.client_exceptions.ClientConnectorError as e:
|
||||
self.logger.critical(f'Exception while fetching Shadow Server lists: {e}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
modules_manager = ShadowServerManager()
|
||||
if modules_manager.config:
|
||||
modules_manager.run(sleep_in_sec=3600)
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from subprocess import Popen
|
||||
|
@ -13,6 +13,7 @@ if __name__ == '__main__':
|
|||
Popen(['loadprefixes.py'])
|
||||
Popen(['rislookup.py'])
|
||||
Popen(['fetcher.py'])
|
||||
Popen(['ssfetcher.py'])
|
||||
Popen(['parser.py'])
|
||||
Popen(['sanitizer.py'])
|
||||
Popen(['dbinsert.py'])
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from subprocess import Popen
|
||||
|
|
2
setup.py
2
setup.py
|
@ -13,7 +13,7 @@ setup(
|
|||
description='BGP Ranking, the new one..',
|
||||
packages=['bgpranking'],
|
||||
scripts=['bin/archiver.py', 'bin/dbinsert.py', 'bin/fetcher.py', 'bin/parser.py',
|
||||
'bin/loadprefixes.py', 'bin/rislookup.py', 'bin/sanitizer.py', 'bin/run_backend.py',
|
||||
'bin/loadprefixes.py', 'bin/rislookup.py', 'bin/sanitizer.py', 'bin/run_backend.py', 'bin/ssfetcher.py',
|
||||
'bin/monitor.py', 'bin/ranking.py', 'bin/asn_descriptions.py', 'bin/start.py', 'bin/stop.py', 'bin/shutdown.py'],
|
||||
classifiers=[
|
||||
'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)',
|
||||
|
|
Loading…
Reference in New Issue