BGP-Ranking/bgpranking/bgpranking.py

301 lines
12 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import logging
import re
from redis import ConnectionPool, Redis
from redis.connection import UnixDomainSocketConnection
from .default import get_config, get_socket_path
from typing import TypeVar, Union, Optional, Dict, Any, List, Tuple
import datetime
from datetime import timedelta
from dateutil.parser import parse
from collections import defaultdict
import json
from .default import InvalidDateFormat
from .helpers import get_modules
from .statsripe import StatsRIPE
Dates = TypeVar('Dates', datetime.datetime, datetime.date, str)
class BGPRanking():
def __init__(self) -> None:
self.logger = logging.getLogger(f'{self.__class__.__name__}')
self.logger.setLevel(get_config('generic', 'loglevel'))
self.cache_pool: ConnectionPool = ConnectionPool(connection_class=UnixDomainSocketConnection,
path=get_socket_path('cache'), decode_responses=True)
self.storage = Redis(get_config('generic', 'storage_db_hostname'), get_config('generic', 'storage_db_port'), decode_responses=True)
self.asn_meta = Redis(get_config('generic', 'storage_db_hostname'), get_config('generic', 'storage_db_port'), decode_responses=True)
self.ranking = Redis(get_config('generic', 'ranking_db_hostname'), get_config('generic', 'ranking_db_port'))
@property
def cache(self):
return Redis(connection_pool=self.cache_pool, db=1)
def check_redis_up(self) -> bool:
return self.cache.ping()
def __normalize_date(self, date: Optional[Dates]) -> str:
if not date:
return datetime.date.today().isoformat()
if isinstance(date, datetime.datetime):
return date.date().isoformat()
elif isinstance(date, datetime.date):
return date.isoformat()
elif isinstance(date, str):
try:
return parse(date).date().isoformat()
except ValueError:
raise InvalidDateFormat('Unable to parse the date. Should be YYYY-MM-DD.')
def _ranking_cache_wrapper(self, key):
if not self.cache.exists(key):
if self.ranking.exists(key):
try:
content: List[Tuple[bytes, float]] = self.ranking.zrangebyscore(key, '-Inf', '+Inf', withscores=True)
# Cache for 10 hours
self.cache.zadd(key, {value: rank for value, rank in content})
self.cache.expire(key, 36000)
except Exception as e:
self.logger.exception(f'Something went poorly when caching {key}.')
raise e
def asns_global_ranking(self, date: Optional[Dates]=None, source: Union[list, str]='',
ipversion: str='v4', limit: int=100):
'''Aggregated ranking of all the ASNs known in the system, weighted by source.'''
to_return: Dict[str, Any] = {
'meta': {'ipversion': ipversion, 'limit': limit},
'source': source,
'response': set()
}
d = self.__normalize_date(date)
to_return['meta']['date'] = d
if source:
if isinstance(source, list):
keys = []
for s in source:
key = f'{d}|{s}|asns|{ipversion}'
self._ranking_cache_wrapper(key)
keys.append(key)
# union the ranked sets
key = '|'.join(sorted(source)) + f'|{d}|asns|{ipversion}'
if not self.cache.exists(key):
self.cache.zunionstore(key, keys)
else:
key = f'{d}|{source}|asns|{ipversion}'
else:
key = f'{d}|asns|{ipversion}'
self._ranking_cache_wrapper(key)
to_return['response'] = self.cache.zrevrange(key, start=0, end=limit, withscores=True)
return to_return
def asn_details(self, asn: int, date: Optional[Dates]=None, source: Union[list, str]='',
ipversion: str='v4'):
'''Aggregated ranking of all the prefixes anounced by the given ASN, weighted by source.'''
to_return: Dict[str, Any] = {
'meta': {'asn': asn, 'ipversion': ipversion, 'source': source},
'response': set()
}
d = self.__normalize_date(date)
to_return['meta']['date'] = d
if source:
if isinstance(source, list):
keys = []
for s in source:
key = f'{d}|{s}|{asn}|{ipversion}|prefixes'
self._ranking_cache_wrapper(key)
keys.append(key)
# union the ranked sets
key = '|'.join(sorted(source)) + f'|{d}|{asn}|{ipversion}'
if not self.cache.exists(key):
self.cache.zunionstore(key, keys)
else:
key = f'{d}|{source}|{asn}|{ipversion}|prefixes'
else:
key = f'{d}|{asn}|{ipversion}'
self._ranking_cache_wrapper(key)
to_return['response'] = self.cache.zrevrange(key, start=0, end=-1, withscores=True)
return to_return
def asn_rank(self, asn: int, date: Optional[Dates]=None, source: Union[list, str]='',
ipversion: str='v4', with_position: bool=False):
'''Get the rank of a single ASN, weighted by source.'''
to_return: Dict[str, Any] = {
'meta': {'asn': asn, 'ipversion': ipversion,
'source': source, 'with_position': with_position},
'response': 0.0
}
d = self.__normalize_date(date)
to_return['meta']['date'] = d
if source:
to_return['meta']['source'] = source
if isinstance(source, list):
keys = []
for s in source:
key = f'{d}|{s}|{asn}|{ipversion}'
self._ranking_cache_wrapper(key)
keys.append(key)
r = sum(float(self.cache.get(key)) for key in keys if self.cache.exists(key))
else:
key = f'{d}|{source}|{asn}|{ipversion}'
self._ranking_cache_wrapper(key)
r = self.cache.get(key)
else:
key = f'{d}|asns|{ipversion}'
self._ranking_cache_wrapper(key)
r = self.cache.zscore(key, asn)
if not r:
r = 0
if with_position and not source:
position = self.cache.zrevrank(key, asn)
if position is not None:
position += 1
to_return['response'] = {'rank': float(r), 'position': position,
'total_known_asns': self.cache.zcard(key)}
else:
to_return['response'] = float(r)
return to_return
def get_sources(self, date: Optional[Dates]=None):
'''Get the sources availables for a specific day (default: today).'''
to_return: Dict[str, Any] = {'meta': {}, 'response': set()}
d = self.__normalize_date(date)
to_return['meta']['date'] = d
key = f'{d}|sources'
to_return['response'] = self.storage.smembers(key)
return to_return
def get_asn_descriptions(self, asn: int, all_descriptions=False) -> Dict[str, Any]:
to_return: Dict[str, Union[Dict, List, str]] = {
'meta': {'asn': asn, 'all_descriptions': all_descriptions},
'response': []
}
descriptions = self.asn_meta.hgetall(f'{asn}|descriptions')
if all_descriptions or not descriptions:
to_return['response'] = descriptions
else:
to_return['response'] = descriptions[sorted(descriptions.keys(), reverse=True)[0]]
return to_return
def get_prefix_ips(self, asn: int, prefix: str, date: Optional[Dates]=None,
source: Union[list, str]='', ipversion: str='v4'):
to_return: Dict[str, Any] = {
'meta': {'asn': asn, 'prefix': prefix, 'ipversion': ipversion,
'source': source},
'response': defaultdict(list)
}
d = self.__normalize_date(date)
to_return['meta']['date'] = d
if source:
to_return['meta']['source'] = source
if isinstance(source, list):
sources = source
else:
sources = [source]
else:
sources = self.get_sources(d)['response']
for source in sources:
ips = set([ip_ts.split('|')[0]
for ip_ts in self.storage.smembers(f'{d}|{source}|{asn}|{prefix}')])
[to_return['response'][ip].append(source) for ip in ips]
return to_return
def get_asn_history(self, asn: int, period: int=100, source: Union[list, str]='',
ipversion: str='v4', date: Optional[Dates]=None):
to_return: Dict[str, Any] = {
'meta': {'asn': asn, 'period': period, 'ipversion': ipversion, 'source': source},
'response': []
}
if date is None:
python_date: datetime.date = datetime.date.today()
elif isinstance(date, str):
python_date = parse(date).date()
elif isinstance(date, datetime.datetime):
python_date = date.date()
else:
python_date = date
to_return['meta']['date'] = python_date.isoformat()
for i in range(period):
d = python_date - timedelta(days=i)
rank = self.asn_rank(asn, d, source, ipversion)
if 'response' not in rank:
rank = 0
to_return['response'].insert(0, (d.isoformat(), rank['response']))
return to_return
def country_rank(self, country: str, date: Optional[Dates]=None, source: Union[list, str]='',
ipversion: str='v4'):
to_return: Dict[str, Any] = {
'meta': {'country': country, 'ipversion': ipversion, 'source': source},
'response': []
}
d = self.__normalize_date(date)
to_return['meta']['date'] = d
ripe = StatsRIPE()
response = ripe.country_asns(country, query_time=d, details=1)
if (not response.get('data') or not response['data'].get('countries') or not
response['data']['countries'][0].get('routed')):
logging.warning(f'Invalid response: {response}')
# FIXME: return something
return 0, [(0, 0)]
routed_asns = re.findall(r"AsnSingle\(([\d]*)\)", response['data']['countries'][0]['routed'])
ranks = [self.asn_rank(asn, d, source, ipversion)['response'] for asn in routed_asns]
to_return['response'] = [sum(ranks), zip(routed_asns, ranks)]
return to_return
def country_history(self, country: Union[list, str], period: int=30, source: Union[list, str]='',
ipversion: str='v4', date: Optional[Dates]=None):
to_return: Dict[str, Any] = {
'meta': {'country': country, 'ipversion': ipversion, 'source': source},
'response': defaultdict(list)
}
if date is None:
python_date: datetime.date = datetime.date.today()
elif isinstance(date, str):
python_date = parse(date).date()
elif isinstance(date, datetime.datetime):
python_date = date.date()
else:
python_date = date
if isinstance(country, str):
country = [country]
for c in country:
for i in range(period):
d = python_date - timedelta(days=i)
rank, details = self.country_rank(c, d, source, ipversion)['response']
if rank is None:
rank = 0
to_return['response'][c].insert(0, (d.isoformat(), rank, list(details)))
return to_return
def get_source_config(self):
pass
def get_sources_configs(self):
loaded = []
for modulepath in get_modules():
with open(modulepath) as f:
loaded.append(json.load(f))
return {'{}-{}'.format(config['vendor'], config['name']): config for config in loaded}