new: major refactoring
parent
3544dcb7e4
commit
adf2f1e157
30
archive.py
30
archive.py
|
@ -1,30 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from listimport.archive import DeepArchive
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
||||
level=logging.INFO, datefmt='%I:%M:%S')
|
||||
|
||||
|
||||
class ModulesArchiver():
|
||||
|
||||
def __init__(self, config_dir: Path=Path('listimport', 'modules_config'),
|
||||
storage_directory: Path=Path('rawdata'),
|
||||
loglevel: int=logging.INFO):
|
||||
self.config_dir = config_dir
|
||||
self.storage_directory = storage_directory
|
||||
self.loglevel = loglevel
|
||||
self.modules_paths = [modulepath for modulepath in self.config_dir.glob('*.json')]
|
||||
self.modules = [DeepArchive(path, self.storage_directory, self.loglevel)
|
||||
for path in self.modules_paths]
|
||||
|
||||
def archive(self):
|
||||
[module.archive() for module in self.modules]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
archiver = ModulesArchiver()
|
||||
archiver.archive()
|
|
@ -10,7 +10,7 @@ import zipfile
|
|||
import logging
|
||||
import json
|
||||
|
||||
from .libs.helpers import safe_create_dir
|
||||
from .libs.helpers import safe_create_dir, set_running, unset_running
|
||||
|
||||
|
||||
class DeepArchive():
|
||||
|
@ -34,6 +34,8 @@ class DeepArchive():
|
|||
self.logger.setLevel(loglevel)
|
||||
|
||||
def archive(self):
|
||||
set_running(self.__class__.__name__)
|
||||
|
||||
to_archive = defaultdict(list)
|
||||
today = date.today()
|
||||
last_day_to_keep = date(today.year, today.month, 1) - relativedelta(months=2)
|
||||
|
@ -54,3 +56,4 @@ class DeepArchive():
|
|||
z.write(f, f.name)
|
||||
# Delete all the files if the archiving worked out properly
|
||||
[f.unlink() for f in path_list]
|
||||
unset_running(self.__class__.__name__)
|
|
@ -4,6 +4,7 @@
|
|||
import logging
|
||||
from redis import Redis
|
||||
from redis import StrictRedis
|
||||
from .libs.helpers import shutdown_requested, set_running, unset_running
|
||||
|
||||
|
||||
class DatabaseInsert():
|
||||
|
@ -19,8 +20,11 @@ class DatabaseInsert():
|
|||
self.logger = logging.getLogger('{}'.format(self.__class__.__name__))
|
||||
self.logger.setLevel(loglevel)
|
||||
|
||||
async def insert(self):
|
||||
def insert(self):
|
||||
set_running(self.__class__.__name__)
|
||||
while True:
|
||||
if shutdown_requested():
|
||||
break
|
||||
uuid = self.redis_sanitized.spop('to_insert')
|
||||
if not uuid:
|
||||
break
|
||||
|
@ -32,7 +36,6 @@ class DatabaseInsert():
|
|||
ris_entry = self.ris_cache.hgetall(data['ip'])
|
||||
if not ris_entry:
|
||||
# RIS data not available yet, retry later
|
||||
# FIXME: an IP can sometimes not be announced, we need to discard it
|
||||
self.redis_sanitized.sadd('to_insert', uuid)
|
||||
# In case this IP is missing in the set to process
|
||||
self.ris_cache.sadd('for_ris_lookup', data['ip'])
|
||||
|
@ -53,3 +56,4 @@ class DatabaseInsert():
|
|||
ris_entry['prefix']),
|
||||
'{}|{}'.format(data['ip'], data['datetime']))
|
||||
self.redis_sanitized.delete(uuid)
|
||||
unset_running(self.__class__.__name__)
|
|
@ -0,0 +1,427 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import requests
|
||||
import os
|
||||
from dateutil import parser
|
||||
from datetime import datetime, date
|
||||
from hashlib import sha512 # Faster than sha256 on 64b machines.
|
||||
from pathlib import Path
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from collections import defaultdict
|
||||
import zipfile
|
||||
import logging
|
||||
import asyncio
|
||||
from pid import PidFile, PidFileError
|
||||
import json
|
||||
import re
|
||||
from redis import Redis
|
||||
from redis import StrictRedis
|
||||
from uuid import uuid4
|
||||
from io import BytesIO
|
||||
import importlib
|
||||
|
||||
from typing import List
|
||||
import types
|
||||
import ipaddress
|
||||
|
||||
|
||||
class BGPRankingException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class FetcherException(BGPRankingException):
|
||||
pass
|
||||
|
||||
|
||||
class ArchiveException(BGPRankingException):
|
||||
pass
|
||||
|
||||
|
||||
class CreateDirectoryException(BGPRankingException):
|
||||
pass
|
||||
|
||||
|
||||
"""
|
||||
Directory structure:
|
||||
storage_directory / vendor / listname -> files to import
|
||||
storage_directory / vendor / listname / meta -> last modified & pid
|
||||
storage_directory / vendor / listname / archive -> imported files <= 2 month old
|
||||
storage_directory / vendor / listname / archive / deep -> imported files > 2 month old (zipped)
|
||||
"""
|
||||
|
||||
|
||||
def safe_create_dir(to_create: Path):
|
||||
if to_create.exists() and not to_create.is_dir():
|
||||
raise CreateDirectoryException('The path {} already exists and is not a directory'.format(to_create))
|
||||
os.makedirs(to_create, exist_ok=True)
|
||||
|
||||
|
||||
class Fetcher():
|
||||
|
||||
def __init__(self, config_file: Path, storage_directory: Path,
|
||||
loglevel: int=logging.DEBUG):
|
||||
'''Load `config_file`, and store the fetched data into `storage_directory`
|
||||
Note: if the `config_file` does not provide a URL (the file is
|
||||
gathered by some oter mean), the fetcher is automatically stoped.'''
|
||||
with open(config_file, 'r') as f:
|
||||
module_parameters = json.load(f)
|
||||
self.vendor = module_parameters['vendor']
|
||||
self.listname = module_parameters['name']
|
||||
self.__init_logger(loglevel)
|
||||
self.fetcher = True
|
||||
if 'url' not in module_parameters:
|
||||
self.logger.info('No URL to fetch, breaking.')
|
||||
self.fetcher = False
|
||||
return
|
||||
self.url = module_parameters['url']
|
||||
self.logger.debug('Starting fetcher on {}'.format(self.url))
|
||||
self.directory = storage_directory / self.vendor / self.listname
|
||||
safe_create_dir(self.directory)
|
||||
self.meta = self.directory / 'meta'
|
||||
safe_create_dir(self.meta)
|
||||
self.archive_dir = self.directory / 'archive'
|
||||
safe_create_dir(self.archive_dir)
|
||||
self.first_fetch = True
|
||||
|
||||
def __init_logger(self, loglevel):
|
||||
self.logger = logging.getLogger('{}-{}-{}'.format(self.__class__.__name__,
|
||||
self.vendor, self.listname))
|
||||
self.logger.setLevel(loglevel)
|
||||
|
||||
def __get_last_modified(self):
|
||||
r = requests.head(self.url)
|
||||
if 'Last-Modified' in r.headers:
|
||||
return parser.parse(r.headers['Last-Modified'])
|
||||
return None
|
||||
|
||||
def __newer(self):
|
||||
'''Check if the file available for download is newed than the one
|
||||
already downloaded by checking the `Last-Modified` header.
|
||||
Note: return False if the file containing the last header content
|
||||
is not existing, or the header doesn't have this key.
|
||||
'''
|
||||
last_modified_path = self.meta / 'lastmodified'
|
||||
if not last_modified_path.exists():
|
||||
# The file doesn't exists
|
||||
if not self.first_fetch:
|
||||
# The URL has no Last-Modified header, we cannot use it.
|
||||
self.logger.debug('No Last-Modified header available')
|
||||
return True
|
||||
self.first_fetch = False
|
||||
last_modified = self.__get_last_modified()
|
||||
if last_modified:
|
||||
self.logger.debug('Last-Modified header available')
|
||||
with last_modified_path.open('w') as f:
|
||||
f.write(last_modified.isoformat())
|
||||
else:
|
||||
self.logger.debug('No Last-Modified header available')
|
||||
return True
|
||||
with last_modified_path.open() as f:
|
||||
last_modified_file = parser.parse(f.read())
|
||||
last_modified = self.__get_last_modified()
|
||||
if not last_modified:
|
||||
# No more Last-Modified header Oo
|
||||
self.logger.warning('{}: Last-Modified header was present, isn\'t anymore!'.format(self.listname))
|
||||
last_modified_path.unlink()
|
||||
return True
|
||||
if last_modified > last_modified_file:
|
||||
self.logger.info('Got a new file.')
|
||||
with last_modified_path.open('w') as f:
|
||||
f.write(last_modified.isoformat())
|
||||
return True
|
||||
return False
|
||||
|
||||
def __same_as_last(self, downloaded):
|
||||
'''Figure out the last downloaded file, check if it is the same as the
|
||||
newly downloaded one. Returns true if both files have been downloaded the
|
||||
same day.
|
||||
Note: we check the new and the archive directory because we may have backlog
|
||||
and the newest file is always the first one we process
|
||||
'''
|
||||
to_check = []
|
||||
to_check_new = sorted([f for f in self.directory.iterdir() if f.is_file()])
|
||||
if to_check_new:
|
||||
# we have files waiting to be processed
|
||||
self.logger.debug('{} file(s) are waiting to be processed'.format(len(to_check_new)))
|
||||
to_check.append(to_check_new[-1])
|
||||
to_check_archive = sorted([f for f in self.archive_dir.iterdir() if f.is_file()])
|
||||
if to_check_archive:
|
||||
# we have files already processed, in the archive
|
||||
self.logger.debug('{} file(s) have been processed'.format(len(to_check_archive)))
|
||||
to_check.append(to_check_archive[-1])
|
||||
if not to_check:
|
||||
self.logger.debug('New list, no hisorical files')
|
||||
# nothing has been downloaded ever, moving on
|
||||
return False
|
||||
for last_file in to_check:
|
||||
with last_file.open('rb') as f:
|
||||
last_hash = sha512(f.read())
|
||||
dl_hash = sha512(downloaded)
|
||||
if (dl_hash.digest() == last_hash.digest() and
|
||||
parser.parse(last_file.name.split('.')[0]).date() == date.today()):
|
||||
self.logger.debug('Same file already downloaded today.')
|
||||
return True
|
||||
return False
|
||||
|
||||
@asyncio.coroutine
|
||||
async def fetch_list(self):
|
||||
'''Fetch & store the list'''
|
||||
if not self.fetcher:
|
||||
return
|
||||
try:
|
||||
with PidFile('{}.pid'.format(self.listname), piddir=self.meta):
|
||||
if not self.__newer():
|
||||
return
|
||||
r = requests.get(self.url)
|
||||
if self.__same_as_last(r.content):
|
||||
return
|
||||
self.logger.info('Got a new file \o/')
|
||||
with (self.directory / '{}.txt'.format(datetime.now().isoformat())).open('wb') as f:
|
||||
f.write(r.content)
|
||||
except PidFileError:
|
||||
self.logger.info('Fetcher already running')
|
||||
|
||||
|
||||
# get announcer: https://stat.ripe.net/data/network-info/data.json?resource=149.13.33.14
|
||||
|
||||
class RawFilesParser():
|
||||
|
||||
def __init__(self, config_file: Path, storage_directory: Path,
|
||||
loglevel: int=logging.DEBUG):
|
||||
with open(config_file, 'r') as f:
|
||||
module_parameters = json.load(f)
|
||||
self.vendor = module_parameters['vendor']
|
||||
self.listname = module_parameters['name']
|
||||
if 'parser' in module_parameters:
|
||||
self.parse_raw_file = types.MethodType(importlib.import_module(module_parameters['parser']).parse_raw_file, self)
|
||||
self.source = '{}-{}'.format(self.vendor, self.listname)
|
||||
self.directory = storage_directory / self.vendor / self.listname
|
||||
safe_create_dir(self.directory)
|
||||
self.__init_logger(loglevel)
|
||||
self.redis_intake = Redis(host='localhost', port=6379, db=0)
|
||||
self.logger.debug('Starting intake on {}'.format(self.source))
|
||||
|
||||
def __init_logger(self, loglevel):
|
||||
self.logger = logging.getLogger('{}-{}-{}'.format(self.__class__.__name__,
|
||||
self.vendor, self.listname))
|
||||
self.logger.setLevel(loglevel)
|
||||
|
||||
@property
|
||||
def files_to_parse(self) -> List[Path]:
|
||||
return sorted([f for f in self.directory.iterdir() if f.is_file()], reverse=True)
|
||||
|
||||
def extract_ipv4(self, bytestream: bytes) -> List[bytes]:
|
||||
return re.findall(rb'[0-9]+(?:\.[0-9]+){3}', bytestream)
|
||||
|
||||
def parse_raw_file(self, f: BytesIO):
|
||||
self.datetime = datetime.now()
|
||||
return self.extract_ipv4(f.getvalue())
|
||||
|
||||
@asyncio.coroutine
|
||||
async def parse_raw_files(self):
|
||||
for filepath in self.files_to_parse:
|
||||
self.logger.debug('Parsing {}, {} to go.'.format(filepath, len(self.files_to_parse) - 1))
|
||||
with open(filepath, 'rb') as f:
|
||||
to_parse = BytesIO(f.read())
|
||||
p = self.redis_intake.pipeline()
|
||||
for ip in self.parse_raw_file(to_parse):
|
||||
uuid = uuid4()
|
||||
p.hmset(uuid, {'ip': ip, 'source': self.source,
|
||||
'datetime': self.datetime.isoformat()})
|
||||
p.sadd('intake', uuid)
|
||||
p.execute()
|
||||
self._archive(filepath)
|
||||
|
||||
def _archive(self, filepath: Path):
|
||||
'''After processing, move file to the archive directory'''
|
||||
filepath.rename(self.directory / 'archive' / filepath.name)
|
||||
|
||||
|
||||
class Sanitizer():
|
||||
|
||||
def __init__(self, loglevel: int=logging.DEBUG):
|
||||
self.__init_logger(loglevel)
|
||||
self.redis_intake = Redis(host='localhost', port=6379, db=0, decode_responses=True)
|
||||
self.redis_sanitized = Redis(host='localhost', port=6380, db=0, decode_responses=True)
|
||||
self.ris_cache = Redis(host='localhost', port=6381, db=0, decode_responses=True)
|
||||
self.logger.debug('Starting import')
|
||||
|
||||
def __init_logger(self, loglevel):
|
||||
self.logger = logging.getLogger('{}'.format(self.__class__.__name__))
|
||||
self.logger.setLevel(loglevel)
|
||||
|
||||
async def sanitize(self):
|
||||
while True:
|
||||
uuid = self.redis_intake.spop('intake')
|
||||
if not uuid:
|
||||
break
|
||||
data = self.redis_intake.hgetall(uuid)
|
||||
try:
|
||||
ip = ipaddress.ip_address(data['ip'])
|
||||
except ValueError:
|
||||
self.logger.info('Invalid IP address: {}'.format(data['ip']))
|
||||
continue
|
||||
if not ip.is_global:
|
||||
self.logger.info('The IP address {} is not global'.format(data['ip']))
|
||||
continue
|
||||
|
||||
date = parser.parse(data['datetime']).date().isoformat()
|
||||
# NOTE: to consider: discard data with an old timestamp (define old)
|
||||
|
||||
# Add to temporay DB for further processing
|
||||
self.ris_cache.sadd('for_ris_lookup', str(ip))
|
||||
pipeline = self.redis_sanitized.pipeline()
|
||||
pipeline.hmset(uuid, {'ip': str(ip), 'source': data['source'],
|
||||
'date': date, 'datetime': data['datetime']})
|
||||
pipeline.sadd('to_insert', uuid)
|
||||
pipeline.execute()
|
||||
self.redis_intake.delete(uuid)
|
||||
|
||||
|
||||
class DatabaseInsert():
|
||||
|
||||
def __init__(self, loglevel: int=logging.DEBUG):
|
||||
self.__init_logger(loglevel)
|
||||
self.ardb_storage = StrictRedis(host='localhost', port=16379, decode_responses=True)
|
||||
self.redis_sanitized = Redis(host='localhost', port=6380, db=0, decode_responses=True)
|
||||
self.ris_cache = Redis(host='localhost', port=6381, db=0, decode_responses=True)
|
||||
self.logger.debug('Starting import')
|
||||
|
||||
def __init_logger(self, loglevel):
|
||||
self.logger = logging.getLogger('{}'.format(self.__class__.__name__))
|
||||
self.logger.setLevel(loglevel)
|
||||
|
||||
async def insert(self):
|
||||
while True:
|
||||
uuid = self.redis_sanitized.spop('to_insert')
|
||||
if not uuid:
|
||||
break
|
||||
data = self.redis_sanitized.hgetall(uuid)
|
||||
# Data gathered from the RIS queries:
|
||||
# * IP Block of the IP -> https://stat.ripe.net/docs/data_api#NetworkInfo
|
||||
# * AS number -> https://stat.ripe.net/docs/data_api#NetworkInfo
|
||||
# * Full text description of the AS (older name) -> https://stat.ripe.net/docs/data_api#AsOverview
|
||||
ris_entry = self.ris_cache.hgetall(data['ip'])
|
||||
if not ris_entry:
|
||||
# RIS data not available yet, retry later
|
||||
# FIXME: an IP can sometimes not be announced, we need to discard it
|
||||
self.redis_sanitized.sadd('to_insert', uuid)
|
||||
# In case this IP is missing in the set to process
|
||||
self.ris_cache.sadd('for_ris_lookup', data['ip'])
|
||||
continue
|
||||
# Format: <YYYY-MM-DD>|sources -> set([<source>, ...])
|
||||
self.ardb_storage.sadd('{}|sources'.format(data['date']), data['source'])
|
||||
|
||||
# Format: <YYYY-MM-DD>|<source> -> set([<asn>, ...])
|
||||
self.ardb_storage.sadd('{}|{}'.format(data['date'], data['source']),
|
||||
ris_entry['asn'])
|
||||
# Format: <YYYY-MM-DD>|<source>|<asn> -> set([<prefix>, ...])
|
||||
self.ardb_storage.sadd('{}|{}|{}'.format(data['date'], data['source'], ris_entry['asn']),
|
||||
ris_entry['prefix'])
|
||||
|
||||
# Format: <YYYY-MM-DD>|<source>|<asn>|<prefix> -> set([<ip>|<datetime>, ...])
|
||||
self.ardb_storage.sadd('{}|{}|{}|{}'.format(data['date'], data['source'],
|
||||
ris_entry['asn'],
|
||||
ris_entry['prefix']),
|
||||
'{}|{}'.format(data['ip'], data['datetime']))
|
||||
self.redis_sanitized.delete(uuid)
|
||||
|
||||
|
||||
class StatsRIPE():
|
||||
|
||||
def __init__(self, sourceapp='bgpranking-ng - CIRCL'):
|
||||
self.url = "https://stat.ripe.net/data/{method}/data.json?{parameters}"
|
||||
self.url_parameters = {'sourceapp': sourceapp}
|
||||
|
||||
async def network_info(self, ip: str) -> dict:
|
||||
method = 'network-info'
|
||||
self.url_parameters['resource'] = ip
|
||||
parameters = '&'.join(['='.join(item) for item in self.url_parameters.items()])
|
||||
url = self.url.format(method=method, parameters=parameters)
|
||||
response = requests.get(url)
|
||||
return response.json()
|
||||
|
||||
async def prefix_overview(self, prefix: str) -> dict:
|
||||
method = 'prefix-overview'
|
||||
self.url_parameters['resource'] = prefix
|
||||
parameters = '&'.join(['='.join(item) for item in self.url_parameters.items()])
|
||||
url = self.url.format(method=method, parameters=parameters)
|
||||
response = requests.get(url)
|
||||
return response.json()
|
||||
|
||||
|
||||
class RoutingInformationServiceFetcher():
|
||||
|
||||
def __init__(self, loglevel: int=logging.DEBUG):
|
||||
self.__init_logger(loglevel)
|
||||
self.ris_cache = Redis(host='localhost', port=6381, db=0)
|
||||
self.logger.debug('Starting RIS fetcher')
|
||||
self.ripe = StatsRIPE()
|
||||
|
||||
def __init_logger(self, loglevel):
|
||||
self.logger = logging.getLogger('{}'.format(self.__class__.__name__))
|
||||
self.logger.setLevel(loglevel)
|
||||
|
||||
async def fetch(self):
|
||||
while True:
|
||||
ip = self.ris_cache.spop('for_ris_lookup')
|
||||
if not ip:
|
||||
break
|
||||
ip = ip.decode()
|
||||
network_info = await self.ripe.network_info(ip)
|
||||
prefix = network_info['data']['prefix']
|
||||
asns = network_info['data']['asns']
|
||||
if not asns or not prefix:
|
||||
self.logger.warning('The IP {} does not seem to be announced'.format(ip))
|
||||
continue
|
||||
prefix_overview = await self.ripe.prefix_overview(prefix)
|
||||
description = prefix_overview['data']['block']['desc']
|
||||
if not description:
|
||||
description = prefix_overview['data']['block']['name']
|
||||
for asn in asns:
|
||||
self.ris_cache.hmset(ip, {'asn': asn, 'prefix': prefix,
|
||||
'description': description})
|
||||
|
||||
|
||||
class DeepArchive():
|
||||
|
||||
def __init__(self, config_file: Path, storage_directory: Path,
|
||||
loglevel: int=logging.DEBUG):
|
||||
'''Archive everyfile older than 2 month.'''
|
||||
with open(config_file, 'r') as f:
|
||||
module_parameters = json.load(f)
|
||||
self.vendor = module_parameters['vendor']
|
||||
self.listname = module_parameters['name']
|
||||
self.directory = storage_directory / self.vendor / self.listname / 'archive'
|
||||
safe_create_dir(self.directory)
|
||||
self.deep_archive = self.directory / 'deep'
|
||||
safe_create_dir(self.deep_archive)
|
||||
self.__init_logger(loglevel)
|
||||
|
||||
def __init_logger(self, loglevel):
|
||||
self.logger = logging.getLogger('{}-{}-{}'.format(self.__class__.__name__,
|
||||
self.vendor, self.listname))
|
||||
self.logger.setLevel(loglevel)
|
||||
|
||||
def archive(self):
|
||||
to_archive = defaultdict(list)
|
||||
today = date.today()
|
||||
last_day_to_keep = date(today.year, today.month, 1) - relativedelta(months=2)
|
||||
for p in self.directory.iterdir():
|
||||
if not p.is_file():
|
||||
continue
|
||||
filedate = parser.parse(p.name.split('.')[0]).date()
|
||||
if filedate >= last_day_to_keep:
|
||||
continue
|
||||
to_archive['{}.zip'.format(filedate.strftime('%Y%m'))].append(p)
|
||||
if to_archive:
|
||||
self.logger.info('Found old files. Archiving: {}'.format(', '.join(to_archive.keys())))
|
||||
else:
|
||||
self.logger.debug('No old files.')
|
||||
for archivename, path_list in to_archive.items():
|
||||
with zipfile.ZipFile(self.deep_archive / archivename, 'x', zipfile.ZIP_DEFLATED) as z:
|
||||
for f in path_list:
|
||||
z.write(f, f.name)
|
||||
# Delete all the files if the archiving worked out properly
|
||||
[f.unlink() for f in path_list]
|
|
@ -0,0 +1,63 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from .exceptions import CreateDirectoryException
|
||||
from redis import StrictRedis
|
||||
from redis.exceptions import ConnectionError
|
||||
from datetime import datetime, timedelta
|
||||
import time
|
||||
|
||||
|
||||
def get_config_path():
|
||||
return Path(sys.modules['bgpranking'].__file__).parent / 'config'
|
||||
|
||||
|
||||
def get_list_storage_path():
|
||||
return Path(os.environ['VIRTUAL_ENV'])
|
||||
|
||||
|
||||
def get_homedir():
|
||||
return Path(os.environ['BGPRANKING_HOME'])
|
||||
|
||||
|
||||
def safe_create_dir(to_create: Path):
|
||||
if to_create.exists() and not to_create.is_dir():
|
||||
raise CreateDirectoryException('The path {} already exists and is not a directory'.format(to_create))
|
||||
os.makedirs(to_create, exist_ok=True)
|
||||
|
||||
|
||||
def set_running(name: str):
|
||||
r = StrictRedis(host='localhost', port=6582, db=1, decode_responses=True)
|
||||
r.hset('running', name, 1)
|
||||
|
||||
|
||||
def unset_running(name: str):
|
||||
r = StrictRedis(host='localhost', port=6582, db=1, decode_responses=True)
|
||||
r.hdel('running', name)
|
||||
|
||||
|
||||
def is_running():
|
||||
r = StrictRedis(host='localhost', port=6582, db=1, decode_responses=True)
|
||||
return r.hgetall('running')
|
||||
|
||||
|
||||
def shutdown_requested():
|
||||
try:
|
||||
r = StrictRedis(host='localhost', port=6582, db=1, decode_responses=True)
|
||||
return r.exists('shutdown')
|
||||
except ConnectionRefusedError:
|
||||
return True
|
||||
except ConnectionError:
|
||||
return True
|
||||
|
||||
|
||||
def long_sleep(sleep_in_sec: int, shutdown_check: int=10):
|
||||
sleep_until = datetime.now() + timedelta(seconds=sleep_in_sec)
|
||||
while sleep_until > datetime.now():
|
||||
time.sleep(shutdown_check)
|
||||
if shutdown_requested():
|
||||
return False
|
||||
return True
|
|
@ -10,7 +10,7 @@ import logging
|
|||
from pid import PidFile, PidFileError
|
||||
import json
|
||||
|
||||
from .libs.helpers import safe_create_dir
|
||||
from .libs.helpers import safe_create_dir, set_running, unset_running
|
||||
|
||||
|
||||
class Fetcher():
|
||||
|
@ -127,11 +127,12 @@ class Fetcher():
|
|||
'''Fetch & store the list'''
|
||||
if not self.fetcher:
|
||||
return
|
||||
set_running('{}-{}-{}'.format(self.__class__.__name__, self.vendor, self.listname))
|
||||
try:
|
||||
with PidFile('{}.pid'.format(self.listname), piddir=self.meta):
|
||||
if not await self.__newer():
|
||||
unset_running('{}-{}-{}'.format(self.__class__.__name__, self.vendor, self.listname))
|
||||
return
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(self.url) as r:
|
||||
content = await r.content.read()
|
||||
|
@ -140,5 +141,8 @@ class Fetcher():
|
|||
self.logger.info('Got a new file \o/')
|
||||
with (self.directory / '{}.txt'.format(datetime.now().isoformat())).open('wb') as f:
|
||||
f.write(content)
|
||||
unset_running('{}-{}-{}'.format(self.__class__.__name__, self.vendor, self.listname))
|
||||
except PidFileError:
|
||||
self.logger.info('Fetcher already running')
|
||||
finally:
|
||||
unset_running('{}-{}-{}'.format(self.__class__.__name__, self.vendor, self.listname))
|
|
@ -0,0 +1,38 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from redis import StrictRedis
|
||||
|
||||
|
||||
class Monitor():
|
||||
|
||||
def __init__(self):
|
||||
self.intake = StrictRedis(host='localhost', port=6579, db=0, decode_responses=True)
|
||||
self.sanitize = StrictRedis(host='localhost', port=6580, db=0, decode_responses=True)
|
||||
self.ris_cache = StrictRedis(host='localhost', port=6581, db=0, decode_responses=True)
|
||||
self.prefix_cache = StrictRedis(host='localhost', port=6582, db=0, decode_responses=True)
|
||||
self.running = StrictRedis(host='localhost', port=6582, db=1, decode_responses=True)
|
||||
self.storage = StrictRedis(host='localhost', port=16579, decode_responses=True)
|
||||
|
||||
def get_runinng(self):
|
||||
return self.running.hgetall('running')
|
||||
|
||||
def info_prefix_cache(self):
|
||||
to_return = {'IPv6 Dump': '', 'IPv4 Dump': '', 'Number ASNs': 0}
|
||||
if self.prefix_cache.exists('ready'):
|
||||
v6_dump = self.prefix_cache.get('current|v6')
|
||||
v4_dump = self.prefix_cache.get('current|v4')
|
||||
number_as = self.prefix_cache.scard('asns')
|
||||
to_return['IPv6 Dump'] = v6_dump
|
||||
to_return['IPv4 Dump'] = v4_dump
|
||||
to_return['Number ASNs'] = number_as
|
||||
return to_return
|
||||
|
||||
def get_values(self):
|
||||
ips_in_intake = self.intake.scard('intake')
|
||||
waiting_for_ris_lookup = self.ris_cache.scard('for_ris_lookup')
|
||||
ready_to_insert = self.sanitize.scard('to_insert')
|
||||
prefix_db_ready = self.prefix_cache.exists('ready')
|
||||
return {'Non-parsed IPs': ips_in_intake, 'Parsed IPs': ready_to_insert,
|
||||
'Awaiting prefix lookup': waiting_for_ris_lookup,
|
||||
'Prefix database ready': prefix_db_ready}
|
|
@ -14,7 +14,7 @@ import importlib
|
|||
from typing import List
|
||||
import types
|
||||
|
||||
from .libs.helpers import safe_create_dir
|
||||
from .libs.helpers import safe_create_dir, set_running, unset_running
|
||||
|
||||
|
||||
class RawFilesParser():
|
||||
|
@ -26,7 +26,7 @@ class RawFilesParser():
|
|||
self.vendor = module_parameters['vendor']
|
||||
self.listname = module_parameters['name']
|
||||
if 'parser' in module_parameters:
|
||||
self.parse_raw_file = types.MethodType(importlib.import_module(module_parameters['parser'], 'listimport').parse_raw_file, self)
|
||||
self.parse_raw_file = types.MethodType(importlib.import_module(module_parameters['parser'], 'bgpranking').parse_raw_file, self)
|
||||
self.source = '{}-{}'.format(self.vendor, self.listname)
|
||||
self.directory = storage_directory / self.vendor / self.listname
|
||||
safe_create_dir(self.directory)
|
||||
|
@ -55,7 +55,8 @@ class RawFilesParser():
|
|||
self.datetime = datetime.now()
|
||||
return self.extract_ipv4(f.getvalue())
|
||||
|
||||
async def parse_raw_files(self):
|
||||
def parse_raw_files(self):
|
||||
set_running(self.source)
|
||||
for filepath in self.files_to_parse:
|
||||
self.logger.debug('Parsing {}, {} to go.'.format(filepath, len(self.files_to_parse) - 1))
|
||||
with open(filepath, 'rb') as f:
|
||||
|
@ -68,6 +69,7 @@ class RawFilesParser():
|
|||
p.sadd('intake', uuid)
|
||||
p.execute()
|
||||
self._archive(filepath)
|
||||
unset_running(self.source)
|
||||
|
||||
def _archive(self, filepath: Path):
|
||||
'''After processing, move file to the archive directory'''
|
|
@ -9,6 +9,9 @@ import gzip
|
|||
from io import BytesIO
|
||||
from collections import defaultdict
|
||||
import re
|
||||
import time
|
||||
from .libs.helpers import set_running, unset_running
|
||||
|
||||
|
||||
# Dataset source: Routeviews Prefix to AS mappings Dataset for IPv4 and IPv6
|
||||
# http://www.caida.org/data/routing/routeviews-prefix2as.xml
|
||||
|
@ -18,7 +21,7 @@ class PrefixDatabase():
|
|||
|
||||
def __init__(self, loglevel: int=logging.DEBUG):
|
||||
self.__init_logger(loglevel)
|
||||
self.redis_cache = Redis(host='localhost', port=6582, db=0, decode_responses=True)
|
||||
self.prefix_cache = Redis(host='localhost', port=6582, db=0, decode_responses=True)
|
||||
self.ipv6_url = 'http://data.caida.org/datasets/routing/routeviews6-prefix2as/{}'
|
||||
self.ipv4_url = 'http://data.caida.org/datasets/routing/routeviews-prefix2as/{}'
|
||||
|
||||
|
@ -26,11 +29,20 @@ class PrefixDatabase():
|
|||
self.logger = logging.getLogger('{}'.format(self.__class__.__name__))
|
||||
self.logger.setLevel(loglevel)
|
||||
|
||||
def update_required(self):
|
||||
v4_is_new, v4_path = self._has_new('v4', self.ipv4_url)
|
||||
v6_is_new, v6_path = self._has_new('v6', self.ipv6_url)
|
||||
if any([v4_is_new, v6_is_new]):
|
||||
self.logger.info('Prefix update required.')
|
||||
else:
|
||||
self.logger.debug('No prefix update required.')
|
||||
return any([v4_is_new, v6_is_new])
|
||||
|
||||
def _has_new(self, address_family, root_url):
|
||||
r = requests.get(root_url.format('pfx2as-creation.log'))
|
||||
last_entry = r.text.split('\n')[-2]
|
||||
path = last_entry.split('\t')[-1]
|
||||
if path == self.redis_cache.get('current|{}'.format(address_family)):
|
||||
if path == self.prefix_cache.get('current|{}'.format(address_family)):
|
||||
self.logger.debug('Same file already loaded: {}'.format(path))
|
||||
return False, path
|
||||
return True, path
|
||||
|
@ -42,13 +54,13 @@ class PrefixDatabase():
|
|||
with gzip.open(BytesIO(r.content), 'r') as f:
|
||||
for line in f:
|
||||
prefix, length, asns = line.decode().strip().split('\t')
|
||||
# The meaning of AS set and multi-origin AS in unclear. Tacking the first ASN in the list only.
|
||||
# The meaning of AS set and multi-origin AS in unclear. Taking the first ASN in the list only.
|
||||
asn = re.split('[,_]', asns)[0]
|
||||
network = ip_network('{}/{}'.format(prefix, length))
|
||||
to_import[asn][address_family].add(str(network))
|
||||
to_import[asn]['ipcount'] += network.num_addresses
|
||||
|
||||
p = self.redis_cache.pipeline()
|
||||
p = self.prefix_cache.pipeline()
|
||||
p.sadd('asns', *to_import.keys())
|
||||
for asn, data in to_import.items():
|
||||
p.sadd('{}|{}'.format(asn, address_family), *data[address_family])
|
||||
|
@ -58,10 +70,17 @@ class PrefixDatabase():
|
|||
return True
|
||||
|
||||
def load_prefixes(self):
|
||||
set_running(self.__class__.__name__)
|
||||
self.prefix_cache.delete('ready')
|
||||
self.logger.info('Prefix update starting in a few seconds.')
|
||||
time.sleep(15)
|
||||
v4_is_new, v4_path = self._has_new('v4', self.ipv4_url)
|
||||
v6_is_new, v6_path = self._has_new('v6', self.ipv6_url)
|
||||
|
||||
if v4_is_new or v6_is_new:
|
||||
self.redis_cache.flushdb()
|
||||
self._init_routes('v6', self.ipv6_url, v6_path)
|
||||
self._init_routes('v4', self.ipv4_url, v4_path)
|
||||
self.prefix_cache.flushdb()
|
||||
# TODO: Add a catchall for everything that isn't announced so we can track that down later on
|
||||
self._init_routes('v6', self.ipv6_url, v6_path)
|
||||
self._init_routes('v4', self.ipv4_url, v4_path)
|
||||
self.prefix_cache.set('ready', 1)
|
||||
self.logger.info('Prefix update complete.')
|
||||
unset_running(self.__class__.__name__)
|
|
@ -0,0 +1,71 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
from redis import Redis
|
||||
|
||||
import time
|
||||
import pytricia
|
||||
import ipaddress
|
||||
from .libs.helpers import shutdown_requested, set_running, unset_running
|
||||
|
||||
|
||||
class RISPrefixLookup():
|
||||
|
||||
def __init__(self, loglevel: int=logging.DEBUG):
|
||||
self.__init_logger(loglevel)
|
||||
self.logger.info('Starting RIS Prefix fetcher')
|
||||
self.prefix_db = Redis(host='localhost', port=6582, db=0, decode_responses=True)
|
||||
self.longest_prefix_matching = Redis(host='localhost', port=6581, db=0, decode_responses=True)
|
||||
self.tree_v4 = pytricia.PyTricia()
|
||||
self.tree_v6 = pytricia.PyTricia(128)
|
||||
self.init_tree()
|
||||
|
||||
def __init_logger(self, loglevel):
|
||||
self.logger = logging.getLogger('{}'.format(self.__class__.__name__))
|
||||
self.logger.setLevel(loglevel)
|
||||
|
||||
def cache_prefix(self, pipe, ip, prefix, asns):
|
||||
pipe.hmset(ip, {'asn': asns, 'prefix': prefix})
|
||||
pipe.expire(ip, 43200) # 12H
|
||||
|
||||
def init_tree(self):
|
||||
for asn in self.prefix_db.smembers('asns'):
|
||||
for prefix in self.prefix_db.smembers('{}|{}'.format(asn, 'v4')):
|
||||
self.tree_v4[prefix] = asn
|
||||
for prefix in self.prefix_db.smembers('{}|{}'.format(asn, 'v6')):
|
||||
self.tree_v6[prefix] = asn
|
||||
self.tree_v4['0.0.0.0/0'] = 0
|
||||
self.tree_v4['::/0'] = 0
|
||||
|
||||
def run(self):
|
||||
set_running(self.__class__.__name__)
|
||||
while True:
|
||||
if shutdown_requested():
|
||||
break
|
||||
if not self.prefix_db.get('ready'):
|
||||
self.logger.debug('Prefix database not ready.')
|
||||
time.sleep(5)
|
||||
continue
|
||||
ips = self.longest_prefix_matching.spop('for_ris_lookup', 100)
|
||||
if not ips: # TODO: add a check against something to stop the loop
|
||||
self.logger.debug('Nothing to lookup')
|
||||
break
|
||||
pipe = self.longest_prefix_matching.pipeline(transaction=False)
|
||||
for ip in ips:
|
||||
if self.longest_prefix_matching.exists(ip):
|
||||
self.logger.debug('Already cached: {}'.format(ip))
|
||||
continue
|
||||
ip = ipaddress.ip_address(ip)
|
||||
if ip.version == 4:
|
||||
prefix = self.tree_v4.get_key(ip)
|
||||
asns = self.tree_v4.get(ip)
|
||||
else:
|
||||
prefix = self.tree_v6.get_key(ip)
|
||||
asns = self.tree_v6.get(ip)
|
||||
if not prefix:
|
||||
self.logger.warning('The IP {} does not seem to be announced'.format(ip))
|
||||
continue
|
||||
self.cache_prefix(pipe, ip, prefix, asns)
|
||||
pipe.execute()
|
||||
unset_running(self.__class__.__name__)
|
|
@ -0,0 +1,57 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from dateutil import parser
|
||||
import logging
|
||||
from redis import StrictRedis
|
||||
from .libs.helpers import shutdown_requested, set_running, unset_running
|
||||
|
||||
import ipaddress
|
||||
|
||||
|
||||
class Sanitizer():
|
||||
|
||||
def __init__(self, loglevel: int=logging.DEBUG):
|
||||
self.__init_logger(loglevel)
|
||||
self.redis_intake = StrictRedis(host='localhost', port=6579, db=0, decode_responses=True)
|
||||
self.redis_sanitized = StrictRedis(host='localhost', port=6580, db=0, decode_responses=True)
|
||||
self.ris_cache = StrictRedis(host='localhost', port=6581, db=0, decode_responses=True)
|
||||
self.logger.debug('Starting import')
|
||||
|
||||
def __init_logger(self, loglevel):
|
||||
self.logger = logging.getLogger('{}'.format(self.__class__.__name__))
|
||||
self.logger.setLevel(loglevel)
|
||||
|
||||
def sanitize(self):
|
||||
set_running(self.__class__.__name__)
|
||||
while True:
|
||||
if shutdown_requested():
|
||||
break
|
||||
uuids = self.redis_intake.spop('intake', 100)
|
||||
if not uuids:
|
||||
break
|
||||
for_ris_lookup = []
|
||||
pipeline = self.redis_sanitized.pipeline(transaction=False)
|
||||
for uuid in uuids:
|
||||
data = self.redis_intake.hgetall(uuid)
|
||||
try:
|
||||
ip = ipaddress.ip_address(data['ip'])
|
||||
except ValueError:
|
||||
self.logger.info('Invalid IP address: {}'.format(data['ip']))
|
||||
continue
|
||||
if not ip.is_global:
|
||||
self.logger.info('The IP address {} is not global'.format(data['ip']))
|
||||
continue
|
||||
|
||||
date = parser.parse(data['datetime']).date().isoformat()
|
||||
# NOTE: to consider: discard data with an old timestamp (define old)
|
||||
|
||||
# Add to temporay DB for further processing
|
||||
for_ris_lookup.append(str(ip))
|
||||
pipeline.hmset(uuid, {'ip': str(ip), 'source': data['source'],
|
||||
'date': date, 'datetime': data['datetime']})
|
||||
pipeline.sadd('to_insert', uuid)
|
||||
pipeline.execute()
|
||||
self.redis_intake.delete(*uuid)
|
||||
self.ris_cache.sadd('for_ris_lookup', *for_ris_lookup)
|
||||
unset_running(self.__class__.__name__)
|
Binary file not shown.
|
@ -0,0 +1,42 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from bgpranking.archive import DeepArchive
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from bgpranking.libs.helpers import get_config_path, get_list_storage_path
|
||||
from pid import PidFile, PidFileError
|
||||
|
||||
|
||||
logger = logging.getLogger('Archiver')
|
||||
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
||||
level=logging.INFO, datefmt='%I:%M:%S')
|
||||
|
||||
# NOTE:
|
||||
# * Supposed to run once every ~2 months
|
||||
|
||||
|
||||
class ModulesArchiver():
|
||||
|
||||
def __init__(self, config_dir: Path=None, storage_directory: Path=None, loglevel: int=logging.INFO):
|
||||
if not config_dir:
|
||||
config_dir = get_config_path()
|
||||
if not storage_directory:
|
||||
self.storage_directory = get_list_storage_path()
|
||||
modules_config = config_dir / 'modules'
|
||||
modules_paths = [modulepath for modulepath in modules_config.glob('*.json')]
|
||||
self.modules = [DeepArchive(path, self.storage_directory, loglevel) for path in modules_paths]
|
||||
|
||||
def archive(self):
|
||||
[module.archive() for module in self.modules]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
archiver = ModulesArchiver()
|
||||
try:
|
||||
with PidFile(piddir=archiver.storage_directory):
|
||||
logger.info('Archiving...')
|
||||
archiver.archive()
|
||||
logger.info('... done.')
|
||||
except PidFileError:
|
||||
logger.warning('Archiver already running, skip.')
|
|
@ -2,8 +2,8 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
import asyncio
|
||||
from listimport.dbinsert import DatabaseInsert
|
||||
from bgpranking.dbinsert import DatabaseInsert
|
||||
from bgpranking.libs.helpers import long_sleep, shutdown_requested
|
||||
|
||||
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
||||
level=logging.INFO, datefmt='%I:%M:%S')
|
||||
|
@ -15,11 +15,15 @@ class DBInsertManager():
|
|||
self.loglevel = loglevel
|
||||
self.dbinsert = DatabaseInsert(loglevel)
|
||||
|
||||
async def run_insert(self):
|
||||
await asyncio.gather(self.dbinsert.insert())
|
||||
def run_insert(self):
|
||||
self.dbinsert.insert()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
modules_manager = DBInsertManager()
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(modules_manager.run_insert())
|
||||
while True:
|
||||
if shutdown_requested():
|
||||
break
|
||||
modules_manager.run_insert()
|
||||
if not long_sleep(120):
|
||||
break
|
|
@ -0,0 +1,49 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
from bgpranking.libs.helpers import long_sleep, shutdown_requested
|
||||
import aiohttp
|
||||
|
||||
from bgpranking.modulesfetcher import Fetcher
|
||||
from bgpranking.libs.helpers import get_config_path, get_list_storage_path
|
||||
|
||||
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
||||
level=logging.INFO, datefmt='%I:%M:%S')
|
||||
|
||||
logger = logging.getLogger('Fetcher')
|
||||
|
||||
|
||||
class ModulesManager():
|
||||
|
||||
def __init__(self, config_dir: Path=None, storage_directory: Path=None, loglevel: int=logging.DEBUG):
|
||||
if not config_dir:
|
||||
config_dir = get_config_path()
|
||||
if not storage_directory:
|
||||
storage_directory = get_list_storage_path()
|
||||
modules_config = config_dir / 'modules'
|
||||
modules_paths = [modulepath for modulepath in modules_config.glob('*.json')]
|
||||
self.modules = [Fetcher(path, storage_directory, loglevel) for path in modules_paths]
|
||||
|
||||
async def run_fetchers(self):
|
||||
await asyncio.gather(
|
||||
*[module.fetch_list() for module in self.modules if module.fetcher]
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
modules_manager = ModulesManager()
|
||||
loop = asyncio.get_event_loop()
|
||||
while True:
|
||||
if shutdown_requested():
|
||||
break
|
||||
try:
|
||||
loop.run_until_complete(modules_manager.run_fetchers())
|
||||
except aiohttp.client_exceptions.ClientConnectorError:
|
||||
logger.critical('Exception while fetching lists.')
|
||||
long_sleep(60)
|
||||
continue
|
||||
if not long_sleep(3600):
|
||||
break
|
|
@ -0,0 +1,40 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
from bgpranking.prefixdb import PrefixDatabase
|
||||
from bgpranking.libs.helpers import long_sleep, shutdown_requested
|
||||
import requests
|
||||
|
||||
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
||||
level=logging.INFO, datefmt='%I:%M:%S')
|
||||
|
||||
logger = logging.getLogger('PrefixDB Fetcher')
|
||||
|
||||
|
||||
class PrefixDBManager():
|
||||
|
||||
def __init__(self, loglevel: int=logging.DEBUG):
|
||||
self.prefix_db = PrefixDatabase(loglevel=loglevel)
|
||||
|
||||
def load_prefixes(self):
|
||||
self.prefix_db.load_prefixes()
|
||||
|
||||
def needs_update(self):
|
||||
return self.prefix_db.update_required()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
p = PrefixDBManager()
|
||||
while True:
|
||||
if shutdown_requested():
|
||||
break
|
||||
try:
|
||||
if p.needs_update():
|
||||
p.load_prefixes()
|
||||
except requests.exceptions.ConnectionError:
|
||||
logger.critical('Unable to download the prefix database.')
|
||||
long_sleep(60)
|
||||
continue
|
||||
if not long_sleep(3600):
|
||||
break
|
|
@ -0,0 +1,25 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from bgpranking.monitor import Monitor
|
||||
import logging
|
||||
|
||||
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
||||
level=logging.INFO, datefmt='%I:%M:%S')
|
||||
|
||||
|
||||
class MonitorManager():
|
||||
|
||||
def __init__(self, loglevel: int=logging.INFO):
|
||||
self.monitor = Monitor()
|
||||
|
||||
def get_values(self):
|
||||
generic = self.monitor.get_values()
|
||||
prefix_cache = self.monitor.info_prefix_cache()
|
||||
running = self.monitor.get_runinng()
|
||||
return generic, prefix_cache, running
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
m = MonitorManager()
|
||||
print(m.get_values())
|
|
@ -0,0 +1,36 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from bgpranking.parser import RawFilesParser
|
||||
from bgpranking.libs.helpers import get_config_path, get_list_storage_path
|
||||
from bgpranking.libs.helpers import long_sleep, shutdown_requested
|
||||
|
||||
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
||||
level=logging.INFO, datefmt='%I:%M:%S')
|
||||
|
||||
|
||||
class ParserManager():
|
||||
|
||||
def __init__(self, config_dir: Path=None, storage_directory: Path=None, loglevel: int=logging.DEBUG):
|
||||
if not config_dir:
|
||||
config_dir = get_config_path()
|
||||
if not storage_directory:
|
||||
storage_directory = get_list_storage_path()
|
||||
modules_config = config_dir / 'modules'
|
||||
modules_paths = [modulepath for modulepath in modules_config.glob('*.json')]
|
||||
self.modules = [RawFilesParser(path, storage_directory, loglevel) for path in modules_paths]
|
||||
|
||||
def run_intake(self):
|
||||
[module.parse_raw_files() for module in self.modules]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser_manager = ParserManager()
|
||||
while True:
|
||||
if shutdown_requested():
|
||||
break
|
||||
parser_manager.run_intake()
|
||||
if not long_sleep(120):
|
||||
break
|
|
@ -0,0 +1,28 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
from bgpranking.risfetcher import RISPrefixLookup
|
||||
from bgpranking.libs.helpers import long_sleep, shutdown_requested
|
||||
|
||||
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
||||
level=logging.INFO, datefmt='%I:%M:%S')
|
||||
|
||||
|
||||
class RISLookupManager():
|
||||
|
||||
def __init__(self, loglevel: int=logging.INFO):
|
||||
self.ris_fetcher = RISPrefixLookup(loglevel=loglevel)
|
||||
|
||||
def run_fetcher(self):
|
||||
self.ris_fetcher.run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
modules_manager = RISLookupManager()
|
||||
while True:
|
||||
if shutdown_requested():
|
||||
break
|
||||
modules_manager.run_fetcher()
|
||||
if not long_sleep(120):
|
||||
break
|
|
@ -0,0 +1,102 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from bgpranking.libs.helpers import get_homedir
|
||||
from subprocess import Popen
|
||||
import time
|
||||
from pathlib import Path
|
||||
from redis import Redis
|
||||
|
||||
import argparse
|
||||
|
||||
|
||||
def launch_cache(storage_directory: Path=None):
|
||||
if not storage_directory:
|
||||
storage_directory = get_homedir()
|
||||
Popen(["./run_redis.sh"], cwd=(storage_directory / 'cache'))
|
||||
|
||||
|
||||
def shutdown_cache(storage_directory: Path=None):
|
||||
if not storage_directory:
|
||||
storage_directory = get_homedir()
|
||||
Popen(["./shutdown_redis.sh"], cwd=(storage_directory / 'cache'))
|
||||
|
||||
|
||||
def launch_temp(storage_directory: Path=None):
|
||||
if not storage_directory:
|
||||
storage_directory = get_homedir()
|
||||
Popen(["./run_redis.sh"], cwd=(storage_directory / 'temp'))
|
||||
|
||||
|
||||
def shutdown_temp(storage_directory: Path=None):
|
||||
if not storage_directory:
|
||||
storage_directory = get_homedir()
|
||||
Popen(["./shutdown_redis.sh"], cwd=(storage_directory / 'temp'))
|
||||
|
||||
|
||||
def launch_storage(storage_directory: Path=None):
|
||||
if not storage_directory:
|
||||
storage_directory = get_homedir()
|
||||
Popen(["./run_ardb.sh"], cwd=(storage_directory / 'storage'))
|
||||
|
||||
|
||||
def shutdown_storage(storage_directory: Path=None):
|
||||
if not storage_directory:
|
||||
storage_directory = get_homedir()
|
||||
Popen(["./shutdown_ardb.sh"], cwd=(storage_directory / 'storage'))
|
||||
|
||||
|
||||
def check_running(host, port):
|
||||
r = Redis(host=host, port=port)
|
||||
return r.ping()
|
||||
|
||||
|
||||
def launch_all():
|
||||
launch_cache()
|
||||
launch_temp()
|
||||
launch_storage()
|
||||
|
||||
|
||||
def check_all(stop=False):
|
||||
backends = [['127.0.0.1', 6579, False], ['127.0.0.1', 6580, False],
|
||||
['127.0.0.1', 6581, False], ['127.0.0.1', 6582, False],
|
||||
['127.0.0.1', 16579, False]]
|
||||
while True:
|
||||
for b in backends:
|
||||
try:
|
||||
b[2] = check_running(b[0], b[1])
|
||||
except Exception:
|
||||
b[2] = False
|
||||
if stop:
|
||||
if not any(b[2] for b in backends):
|
||||
break
|
||||
else:
|
||||
if all(b[2] for b in backends):
|
||||
break
|
||||
for b in backends:
|
||||
if not stop and not b[2]:
|
||||
print('Waiting on {}:{}'.format(b[0], b[1]))
|
||||
if stop and b[2]:
|
||||
print('Waiting on {}:{}'.format(b[0], b[1]))
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
def stop_all():
|
||||
shutdown_cache()
|
||||
shutdown_temp()
|
||||
shutdown_storage()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Manage backend DBs.')
|
||||
parser.add_argument("--start", action='store_true', default=False, help="Start all")
|
||||
parser.add_argument("--stop", action='store_true', default=False, help="Stop all")
|
||||
parser.add_argument("--status", action='store_true', default=True, help="Show status")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.start:
|
||||
launch_all()
|
||||
if args.stop:
|
||||
stop_all()
|
||||
if not args.stop and args.status:
|
||||
check_all()
|
|
@ -2,8 +2,8 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
import asyncio
|
||||
from listimport.sanitizer import Sanitizer
|
||||
from bgpranking.sanitizer import Sanitizer
|
||||
from bgpranking.libs.helpers import long_sleep, shutdown_requested
|
||||
|
||||
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
||||
level=logging.WARNING, datefmt='%I:%M:%S')
|
||||
|
@ -15,11 +15,15 @@ class SanitizerManager():
|
|||
self.loglevel = loglevel
|
||||
self.sanitizer = Sanitizer(loglevel)
|
||||
|
||||
async def run_sanitizer(self):
|
||||
await asyncio.gather(self.sanitizer.sanitize())
|
||||
def run_sanitizer(self):
|
||||
self.sanitizer.sanitize()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
modules_manager = SanitizerManager()
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(modules_manager.run_sanitizer())
|
||||
while True:
|
||||
if shutdown_requested():
|
||||
break
|
||||
modules_manager.run_sanitizer()
|
||||
if not long_sleep(120):
|
||||
break
|
|
@ -0,0 +1,16 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from bgpranking.libs.helpers import is_running
|
||||
import time
|
||||
from redis import StrictRedis
|
||||
|
||||
if __name__ == '__main__':
|
||||
r = StrictRedis(host='localhost', port=6582, db=1, decode_responses=True)
|
||||
r.set('shutdown', 1)
|
||||
while True:
|
||||
running = is_running()
|
||||
print(running)
|
||||
if not running:
|
||||
break
|
||||
time.sleep(10)
|
|
@ -0,0 +1,15 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from subprocess import Popen
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
p = Popen(['run_backend.py', '--start'])
|
||||
p.wait()
|
||||
Popen(['loadprefixes.py'])
|
||||
Popen(['rislookup.py'])
|
||||
Popen(['fetcher.py'])
|
||||
Popen(['parser.py'])
|
||||
Popen(['sanitizer.py'])
|
||||
Popen(['dbinsert.py'])
|
|
@ -0,0 +1,10 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from subprocess import Popen
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
p = Popen(['shutdown.py'])
|
||||
p.wait()
|
||||
Popen(['run_backend.py', '--stop'])
|
36
fetcher.py
36
fetcher.py
|
@ -1,36 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
from listimport.modulesfetcher import Fetcher
|
||||
|
||||
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
||||
level=logging.INFO, datefmt='%I:%M:%S')
|
||||
|
||||
|
||||
class ModulesManager():
|
||||
|
||||
def __init__(self, config_dir: Path=Path('listimport', 'modules_config'),
|
||||
storage_directory: Path=Path('rawdata'),
|
||||
loglevel: int=logging.DEBUG):
|
||||
self.config_dir = config_dir
|
||||
print(config_dir)
|
||||
self.storage_directory = storage_directory
|
||||
self.loglevel = loglevel
|
||||
self.modules_paths = [modulepath for modulepath in self.config_dir.glob('*.json')]
|
||||
self.modules = [Fetcher(path, self.storage_directory, self.loglevel)
|
||||
for path in self.modules_paths]
|
||||
|
||||
async def run_fetchers(self):
|
||||
await asyncio.gather(
|
||||
*[module.fetch_list() for module in self.modules if module.fetcher]
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
modules_manager = ModulesManager()
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(modules_manager.run_fetchers())
|
34
intake.py
34
intake.py
|
@ -1,34 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
from listimport.parser import RawFilesParser
|
||||
|
||||
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
||||
level=logging.INFO, datefmt='%I:%M:%S')
|
||||
|
||||
|
||||
class IntakeManager():
|
||||
|
||||
def __init__(self, config_dir: Path=Path('listimport', 'modules_config'),
|
||||
storage_directory: Path=Path('rawdata'),
|
||||
loglevel: int=logging.DEBUG):
|
||||
self.config_dir = config_dir
|
||||
self.storage_directory = storage_directory
|
||||
self.loglevel = loglevel
|
||||
self.modules_paths = [modulepath for modulepath in self.config_dir.glob('*.json')]
|
||||
self.modules = [RawFilesParser(path, self.storage_directory, self.loglevel)
|
||||
for path in self.modules_paths]
|
||||
|
||||
async def run_intake(self):
|
||||
await asyncio.gather(
|
||||
*[module.parse_raw_files() for module in self.modules]
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
modules_manager = IntakeManager()
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(modules_manager.run_intake())
|
|
@ -1,12 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from .exceptions import CreateDirectoryException
|
||||
|
||||
|
||||
def safe_create_dir(to_create: Path):
|
||||
if to_create.exists() and not to_create.is_dir():
|
||||
raise CreateDirectoryException('The path {} already exists and is not a directory'.format(to_create))
|
||||
os.makedirs(to_create, exist_ok=True)
|
|
@ -1,60 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
from redis import Redis
|
||||
|
||||
import time
|
||||
import pytricia
|
||||
import ipaddress
|
||||
|
||||
|
||||
class RISPrefixLookup():
|
||||
|
||||
def __init__(self, loglevel: int=logging.DEBUG):
|
||||
self.__init_logger(loglevel)
|
||||
self.logger.debug('Starting RIS Prefix fetcher')
|
||||
self.prefix_db = Redis(host='localhost', port=6582, db=0, decode_responses=True)
|
||||
self.longest_prefix_matching = Redis(host='localhost', port=6581, db=0, decode_responses=True)
|
||||
self.tree_v4 = pytricia.PyTricia()
|
||||
self.tree_v6 = pytricia.PyTricia(128)
|
||||
self.init_tree()
|
||||
|
||||
def __init_logger(self, loglevel):
|
||||
self.logger = logging.getLogger('{}'.format(self.__class__.__name__))
|
||||
self.logger.setLevel(loglevel)
|
||||
|
||||
def cache_prefix(self, ip, prefix, asns):
|
||||
p = self.longest_prefix_matching.pipeline()
|
||||
p.hmset(ip, {'asn': asns, 'prefix': prefix})
|
||||
p.expire(ip, 43200) # 12H
|
||||
p.execute()
|
||||
|
||||
def init_tree(self):
|
||||
for asn in self.prefix_db.smembers('asns'):
|
||||
for prefix in self.prefix_db.smembers('{}|{}'.format(asn, 'v4')):
|
||||
self.tree_v4[prefix] = asn
|
||||
for prefix in self.prefix_db.smembers('{}|{}'.format(asn, 'v6')):
|
||||
self.tree_v6[prefix] = asn
|
||||
|
||||
def run(self):
|
||||
while True:
|
||||
ip = self.longest_prefix_matching.spop('for_ris_lookup')
|
||||
if not ip: # TODO: add a check against something to stop the loop
|
||||
self.logger.debug('Nothing to lookup')
|
||||
time.sleep(10)
|
||||
continue
|
||||
if self.longest_prefix_matching.exists(ip):
|
||||
self.logger.debug('Already cached: {}'.format(ip))
|
||||
continue
|
||||
ip = ipaddress.ip_address(ip)
|
||||
if ip.version == 4:
|
||||
prefix = self.tree_v4.get_key(ip)
|
||||
asns = self.tree_v4.get(ip)
|
||||
else:
|
||||
prefix = self.tree_v6.get_key(ip)
|
||||
asns = self.tree_v6.get(ip)
|
||||
if not prefix:
|
||||
self.logger.warning('The IP {} does not seem to be announced'.format(ip))
|
||||
continue
|
||||
self.cache_prefix(ip, prefix, asns)
|
|
@ -1,49 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from dateutil import parser
|
||||
import logging
|
||||
from redis import Redis
|
||||
|
||||
import ipaddress
|
||||
|
||||
|
||||
class Sanitizer():
|
||||
|
||||
def __init__(self, loglevel: int=logging.DEBUG):
|
||||
self.__init_logger(loglevel)
|
||||
self.redis_intake = Redis(host='localhost', port=6579, db=0, decode_responses=True)
|
||||
self.redis_sanitized = Redis(host='localhost', port=6580, db=0, decode_responses=True)
|
||||
self.ris_cache = Redis(host='localhost', port=6581, db=0, decode_responses=True)
|
||||
self.logger.debug('Starting import')
|
||||
|
||||
def __init_logger(self, loglevel):
|
||||
self.logger = logging.getLogger('{}'.format(self.__class__.__name__))
|
||||
self.logger.setLevel(loglevel)
|
||||
|
||||
async def sanitize(self):
|
||||
while True:
|
||||
uuid = self.redis_intake.spop('intake')
|
||||
if not uuid:
|
||||
break
|
||||
data = self.redis_intake.hgetall(uuid)
|
||||
try:
|
||||
ip = ipaddress.ip_address(data['ip'])
|
||||
except ValueError:
|
||||
self.logger.info('Invalid IP address: {}'.format(data['ip']))
|
||||
continue
|
||||
if not ip.is_global:
|
||||
self.logger.info('The IP address {} is not global'.format(data['ip']))
|
||||
continue
|
||||
|
||||
date = parser.parse(data['datetime']).date().isoformat()
|
||||
# NOTE: to consider: discard data with an old timestamp (define old)
|
||||
|
||||
# Add to temporay DB for further processing
|
||||
self.ris_cache.sadd('for_ris_lookup', str(ip))
|
||||
pipeline = self.redis_sanitized.pipeline()
|
||||
pipeline.hmset(uuid, {'ip': str(ip), 'source': data['source'],
|
||||
'date': date, 'datetime': data['datetime']})
|
||||
pipeline.sadd('to_insert', uuid)
|
||||
pipeline.execute()
|
||||
self.redis_intake.delete(uuid)
|
23
ranking.py
23
ranking.py
|
@ -1,23 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
from listimport.initranking import PrefixDatabase
|
||||
|
||||
|
||||
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
||||
level=logging.INFO, datefmt='%I:%M:%S')
|
||||
|
||||
|
||||
class RankingManager():
|
||||
|
||||
def __init__(self, loglevel: int=logging.DEBUG):
|
||||
self.prefix_db = PrefixDatabase(loglevel=loglevel)
|
||||
|
||||
def load_prefixes(self):
|
||||
self.prefix_db.load_prefixes()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
rm = RankingManager()
|
||||
rm.load_prefixes()
|
|
@ -0,0 +1,4 @@
|
|||
git+https://github.com/andymccurdy/redis-py.git
|
||||
python-dateutil
|
||||
git+https://github.com/jsommers/pytricia.git
|
||||
git+https://github.com/trbs/pid.git
|
22
ris.py
22
ris.py
|
@ -1,22 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import logging
|
||||
from listimport.risfetcher import RISPrefixLookup
|
||||
|
||||
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
||||
level=logging.INFO, datefmt='%I:%M:%S')
|
||||
|
||||
|
||||
class RISManager():
|
||||
|
||||
def __init__(self, loglevel: int=logging.DEBUG):
|
||||
self.ris_fetcher = RISPrefixLookup(loglevel=loglevel)
|
||||
|
||||
def run_fetcher(self):
|
||||
self.ris_fetcher.run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
modules_manager = RISManager()
|
||||
modules_manager.run_fetcher()
|
|
@ -0,0 +1,33 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
from setuptools import setup
|
||||
|
||||
|
||||
setup(
|
||||
name='bgpranking',
|
||||
version='0.1',
|
||||
author='Raphaël Vinot',
|
||||
author_email='raphael.vinot@circl.lu',
|
||||
maintainer='Raphaël Vinot',
|
||||
url='https://github.com/D4-project/BGP-Ranking',
|
||||
description='BGP Ranking, the new one..',
|
||||
packages=['bgpranking'],
|
||||
scripts=['bin/archiver.py', 'bin/dbinsert.py', 'bin/fetcher.py', 'bin/parser.py',
|
||||
'bin/loadprefixes.py', 'bin/rislookup.py', 'bin/sanitizer.py', 'bin/run_backend.py',
|
||||
'bin/monitor.py', 'bin/start.py', 'bin/stop.py', 'bin/shutdown.py'],
|
||||
classifiers=[
|
||||
'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)',
|
||||
'Development Status :: 3 - Alpha',
|
||||
'Environment :: Console',
|
||||
'Operating System :: POSIX :: Linux',
|
||||
'Intended Audience :: Science/Research',
|
||||
'Intended Audience :: Telecommunications Industry',
|
||||
'Intended Audience :: Information Technology',
|
||||
'Programming Language :: Python :: 3',
|
||||
'Topic :: Security',
|
||||
'Topic :: Internet',
|
||||
],
|
||||
include_package_data=True,
|
||||
package_data={'config': ['config/*/*.conf',
|
||||
'config/modules/*.json']},
|
||||
)
|
Loading…
Reference in New Issue