BGP-Ranking/bgpranking/querying.py

215 lines
8.8 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
2018-04-10 21:27:35 +02:00
# -*- coding: utf-8 -*-
from typing import TypeVar, Union
2018-04-10 23:22:32 +02:00
import datetime
2018-05-31 15:48:11 +02:00
from datetime import timedelta
2018-04-10 21:27:35 +02:00
from dateutil.parser import parse
2018-06-01 17:13:56 +02:00
from collections import defaultdict
2018-04-10 21:27:35 +02:00
import logging
import json
2018-04-10 21:27:35 +02:00
from redis import StrictRedis
from .libs.helpers import get_socket_path, get_config_path
from .libs.exceptions import InvalidDateFormat
2018-07-27 14:33:25 +02:00
from .libs.statsripe import StatsRIPE
2018-04-10 21:27:35 +02:00
2018-04-10 23:22:32 +02:00
Dates = TypeVar('Dates', datetime.datetime, datetime.date, str)
2018-04-10 21:27:35 +02:00
class Querying():
2018-04-10 21:27:35 +02:00
def __init__(self, loglevel: int=logging.DEBUG):
self.__init_logger(loglevel)
self.storage = StrictRedis(unix_socket_path=get_socket_path('storage'), decode_responses=True)
self.ranking = StrictRedis(unix_socket_path=get_socket_path('storage'), db=1, decode_responses=True)
self.asn_meta = StrictRedis(unix_socket_path=get_socket_path('storage'), db=2, decode_responses=True)
self.cache = StrictRedis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
2018-04-10 21:27:35 +02:00
def __init_logger(self, loglevel: int):
self.logger = logging.getLogger(f'{self.__class__.__name__}')
self.logger.setLevel(loglevel)
def __normalize_date(self, date: Dates):
2018-04-10 23:22:32 +02:00
if isinstance(date, datetime.datetime):
2018-04-10 21:27:35 +02:00
return date.date().isoformat()
2018-04-10 23:22:32 +02:00
elif isinstance(date, datetime.date):
2018-04-10 21:27:35 +02:00
return date.isoformat()
elif isinstance(date, str):
try:
return parse(date).date().isoformat()
except ValueError:
raise InvalidDateFormat('Unable to parse the date. Should be YYYY-MM-DD.')
def ranking_cache_wrapper(self, key):
if not self.cache.exists(key):
if self.ranking.exists(key):
key_dump = self.ranking.dump(key)
# Cache for 10 hours
self.cache.restore(key, 36000, key_dump, True)
def asns_global_ranking(self, date: Dates=datetime.date.today(), source: Union[list, str]='',
ipversion: str='v4', limit: int=100):
2018-04-10 21:27:35 +02:00
'''Aggregated ranking of all the ASNs known in the system, weighted by source.'''
d = self.__normalize_date(date)
2018-04-12 18:09:04 +02:00
if source:
if isinstance(source, list):
keys = []
for s in source:
key = f'{d}|{s}|asns|{ipversion}'
self.ranking_cache_wrapper(key)
keys.append(key)
# union the ranked sets
key = '|'.join(sorted(source)) + f'|{d}|asns|{ipversion}'
if not self.cache.exists(key):
self.cache.zunionstore(key, keys)
else:
key = f'{d}|{source}|asns|{ipversion}'
2018-04-12 18:09:04 +02:00
else:
key = f'{d}|asns|{ipversion}'
self.ranking_cache_wrapper(key)
return self.cache.zrevrange(key, start=0, end=limit, withscores=True)
2018-04-10 21:27:35 +02:00
def asn_details(self, asn: int, date: Dates= datetime.date.today(), source: Union[list, str]='',
ipversion: str='v4'):
2018-04-10 21:27:35 +02:00
'''Aggregated ranking of all the prefixes anounced by the given ASN, weighted by source.'''
d = self.__normalize_date(date)
2018-04-12 18:09:04 +02:00
if source:
if isinstance(source, list):
keys = []
for s in source:
key = f'{d}|{s}|{asn}|{ipversion}|prefixes'
self.ranking_cache_wrapper(key)
keys.append(key)
# union the ranked sets
key = '|'.join(sorted(source)) + f'|{d}|{asn}|{ipversion}'
if not self.cache.exists(key):
self.cache.zunionstore(key, keys)
else:
key = f'{d}|{source}|{asn}|{ipversion}|prefixes'
2018-04-12 18:09:04 +02:00
else:
key = f'{d}|{asn}|{ipversion}'
self.ranking_cache_wrapper(key)
return self.cache.zrevrange(key, start=0, end=-1, withscores=True)
2018-04-10 21:27:35 +02:00
def asn_rank(self, asn: int, date: Dates=datetime.date.today(), source: Union[list, str]='',
ipversion: str='v4'):
2018-04-10 21:27:35 +02:00
'''Get the rank of a single ASN, weighted by source.'''
d = self.__normalize_date(date)
2018-04-12 18:09:04 +02:00
if source:
if isinstance(source, list):
keys = []
for s in source:
key = f'{d}|{s}|{asn}|{ipversion}'
self.ranking_cache_wrapper(key)
keys.append(key)
r = sum(float(self.cache.get(key)) for key in keys if self.cache.exists(key))
else:
key = f'{d}|{source}|{asn}|{ipversion}'
self.ranking_cache_wrapper(key)
r = self.cache.get(key)
2018-04-12 18:09:04 +02:00
else:
key = f'{d}|asns|{ipversion}'
self.ranking_cache_wrapper(key)
r = self.cache.zscore(key, asn)
2018-07-27 14:33:25 +02:00
if r:
2018-07-30 15:17:57 +02:00
return float(r)
2018-07-27 14:33:25 +02:00
return 0
2018-04-10 21:27:35 +02:00
2018-05-31 15:48:11 +02:00
def get_sources(self, date: Dates=datetime.date.today()):
2018-04-12 18:09:04 +02:00
'''Get the sources availables for a specific day (default: today).'''
2018-04-10 21:27:35 +02:00
d = self.__normalize_date(date)
2018-04-12 18:09:04 +02:00
key = f'{d}|sources'
return self.storage.smembers(key)
2018-04-13 18:02:44 +02:00
def get_asn_descriptions(self, asn: int, all_descriptions=False):
descriptions = self.asn_meta.hgetall(f'{asn}|descriptions')
if all_descriptions or not descriptions:
return descriptions
return descriptions[sorted(descriptions.keys(), reverse=True)[0]]
2018-05-31 15:48:11 +02:00
def get_prefix_ips(self, asn: int, prefix: str, date: Dates=datetime.date.today(),
source: Union[list, str]='', ipversion: str='v4'):
2018-06-01 17:13:56 +02:00
if source:
if isinstance(source, list):
sources = source
else:
sources = [source]
2018-06-01 17:13:56 +02:00
else:
sources = self.get_sources(date)
prefix_ips = defaultdict(list)
d = self.__normalize_date(date)
for source in sources:
ips = set([ip_ts.split('|')[0]
for ip_ts in self.storage.smembers(f'{d}|{source}|{asn}|{prefix}')])
[prefix_ips[ip].append(source) for ip in ips]
return prefix_ips
def get_asn_history(self, asn: int, period: int=100, source: Union[list, str]='',
ipversion: str='v4', date: Dates=datetime.date.today()):
2018-05-31 15:48:11 +02:00
to_return = []
2018-06-07 16:18:50 +02:00
if isinstance(date, str):
date = parse(date).date()
if date + timedelta(days=period / 3) > datetime.date.today():
# the period to display will be around the date passed at least 2/3 before the date, at most 1/3 after
date = datetime.date.today()
2018-05-31 15:48:11 +02:00
for i in range(period):
2018-06-07 16:18:50 +02:00
d = date - timedelta(days=i)
rank = self.asn_rank(asn, d, source, ipversion)
2018-05-31 17:37:35 +02:00
if rank is None:
rank = 0
2018-06-07 16:18:50 +02:00
to_return.insert(0, (d.isoformat(), rank))
2018-05-31 15:48:11 +02:00
return to_return
2018-07-27 14:33:25 +02:00
def country_rank(self, country: str, date: Dates=datetime.date.today(), source: Union[list, str]='',
ipversion: str='v4'):
2018-07-27 14:33:25 +02:00
ripe = StatsRIPE()
d = self.__normalize_date(date)
response = ripe.country_asns(country, query_time=d, details=1)
if (not response.get('data') or not response['data'].get('countries') or not
response['data']['countries'][0].get('routed')):
logging.warning(f'Invalid response: {response}')
# FIXME: return something
2018-07-31 10:54:38 +02:00
return 0, [(0, 0)]
routed_asns = response['data']['countries'][0]['routed']
ranks = [self.asn_rank(asn, d, source, ipversion) for asn in routed_asns]
to_return = zip(routed_asns, ranks)
daily_sum = sum(ranks)
return daily_sum, to_return
2018-07-27 14:33:25 +02:00
def country_history(self, country: Union[list, str], period: int=30, source: Union[list, str]='',
ipversion: str='v4', date: Dates=datetime.date.today()):
to_return = {}
2018-07-27 14:33:25 +02:00
if isinstance(date, str):
date = parse(date).date()
if date + timedelta(days=period / 3) > datetime.date.today():
# the period to display will be around the date passed at least 2/3 before the date, at most 1/3 after
date = datetime.date.today()
if isinstance(country, str):
country = [country]
for c in country:
to_return[c] = []
for i in range(period):
d = date - timedelta(days=i)
rank, details = self.country_rank(c, d, source, ipversion)
if rank is None:
rank = 0
to_return[c].insert(0, (d.isoformat(), rank, list(details)))
2018-07-27 14:33:25 +02:00
return to_return
def get_source_config(self):
pass
def get_sources_configs(self):
config_dir = get_config_path() / 'modules'
loaded = []
for modulepath in config_dir.glob('*.json'):
with open(modulepath) as f:
loaded.append(json.load(f))
return {'{}-{}'.format(config['vendor'], config['name']): config for config in loaded}