2018-04-10 21:27:35 +02:00
|
|
|
#!/usr/bin/env python
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
from typing import TypeVar
|
2018-04-10 23:22:32 +02:00
|
|
|
import datetime
|
2018-05-31 15:48:11 +02:00
|
|
|
from datetime import timedelta
|
2018-04-10 21:27:35 +02:00
|
|
|
from dateutil.parser import parse
|
2018-06-01 17:13:56 +02:00
|
|
|
from collections import defaultdict
|
2018-04-10 21:27:35 +02:00
|
|
|
|
|
|
|
import logging
|
|
|
|
from redis import StrictRedis
|
|
|
|
|
2018-04-11 14:55:20 +02:00
|
|
|
from .libs.helpers import get_socket_path
|
|
|
|
from .libs.exceptions import InvalidDateFormat
|
2018-07-27 14:33:25 +02:00
|
|
|
from .libs.statsripe import StatsRIPE
|
2018-04-10 21:27:35 +02:00
|
|
|
|
2018-04-10 23:22:32 +02:00
|
|
|
Dates = TypeVar('Dates', datetime.datetime, datetime.date, str)
|
2018-04-10 21:27:35 +02:00
|
|
|
|
|
|
|
|
2018-04-11 14:55:20 +02:00
|
|
|
class Querying():
|
2018-04-10 21:27:35 +02:00
|
|
|
|
|
|
|
def __init__(self, loglevel: int=logging.DEBUG):
|
|
|
|
self.__init_logger(loglevel)
|
|
|
|
self.storage = StrictRedis(unix_socket_path=get_socket_path('storage'), decode_responses=True)
|
|
|
|
self.ranking = StrictRedis(unix_socket_path=get_socket_path('storage'), db=1, decode_responses=True)
|
|
|
|
self.asn_meta = StrictRedis(unix_socket_path=get_socket_path('storage'), db=2, decode_responses=True)
|
|
|
|
|
|
|
|
def __init_logger(self, loglevel: int):
|
|
|
|
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
|
|
|
self.logger.setLevel(loglevel)
|
|
|
|
|
|
|
|
def __normalize_date(self, date: Dates):
|
2018-04-10 23:22:32 +02:00
|
|
|
if isinstance(date, datetime.datetime):
|
2018-04-10 21:27:35 +02:00
|
|
|
return date.date().isoformat()
|
2018-04-10 23:22:32 +02:00
|
|
|
elif isinstance(date, datetime.date):
|
2018-04-10 21:27:35 +02:00
|
|
|
return date.isoformat()
|
|
|
|
elif isinstance(date, str):
|
|
|
|
try:
|
|
|
|
return parse(date).date().isoformat()
|
|
|
|
except ValueError:
|
|
|
|
raise InvalidDateFormat('Unable to parse the date. Should be YYYY-MM-DD.')
|
|
|
|
|
2018-04-12 18:09:04 +02:00
|
|
|
def asns_global_ranking(self, date: Dates=datetime.date.today(), source: str='', ipversion: str='v4', limit: int=100):
|
2018-04-10 21:27:35 +02:00
|
|
|
'''Aggregated ranking of all the ASNs known in the system, weighted by source.'''
|
|
|
|
d = self.__normalize_date(date)
|
2018-04-12 18:09:04 +02:00
|
|
|
if source:
|
|
|
|
key = f'{d}|{source}|asns|{ipversion}'
|
|
|
|
else:
|
|
|
|
key = f'{d}|asns|{ipversion}'
|
2018-04-10 21:27:35 +02:00
|
|
|
return self.ranking.zrevrange(key, start=0, end=limit, withscores=True)
|
|
|
|
|
2018-04-12 18:09:04 +02:00
|
|
|
def asn_details(self, asn: int, date: Dates= datetime.date.today(), source: str='', ipversion: str='v4'):
|
2018-04-10 21:27:35 +02:00
|
|
|
'''Aggregated ranking of all the prefixes anounced by the given ASN, weighted by source.'''
|
|
|
|
d = self.__normalize_date(date)
|
2018-04-12 18:09:04 +02:00
|
|
|
if source:
|
2018-04-12 18:29:44 +02:00
|
|
|
key = f'{d}|{source}|{asn}|{ipversion}|prefixes'
|
2018-04-12 18:09:04 +02:00
|
|
|
else:
|
|
|
|
key = f'{d}|{asn}|{ipversion}'
|
2018-04-10 21:27:35 +02:00
|
|
|
return self.ranking.zrevrange(key, start=0, end=-1, withscores=True)
|
|
|
|
|
2018-05-31 15:48:11 +02:00
|
|
|
def asn_rank(self, asn: int, date: Dates=datetime.date.today(), source: str='', ipversion: str='v4'):
|
2018-04-10 21:27:35 +02:00
|
|
|
'''Get the rank of a single ASN, weighted by source.'''
|
|
|
|
d = self.__normalize_date(date)
|
2018-04-12 18:09:04 +02:00
|
|
|
if source:
|
2018-04-12 18:29:44 +02:00
|
|
|
key = f'{d}|{source}|{asn}|{ipversion}'
|
2018-07-27 14:33:25 +02:00
|
|
|
r = self.ranking.get(key)
|
2018-04-12 18:09:04 +02:00
|
|
|
else:
|
|
|
|
key = f'{d}|asns|{ipversion}'
|
2018-07-27 14:33:25 +02:00
|
|
|
r = self.ranking.zscore(key, asn)
|
|
|
|
if r:
|
|
|
|
return r
|
|
|
|
return 0
|
2018-04-10 21:27:35 +02:00
|
|
|
|
2018-05-31 15:48:11 +02:00
|
|
|
def get_sources(self, date: Dates=datetime.date.today()):
|
2018-04-12 18:09:04 +02:00
|
|
|
'''Get the sources availables for a specific day (default: today).'''
|
2018-04-10 21:27:35 +02:00
|
|
|
d = self.__normalize_date(date)
|
2018-04-12 18:09:04 +02:00
|
|
|
key = f'{d}|sources'
|
|
|
|
return self.storage.smembers(key)
|
2018-04-13 18:02:44 +02:00
|
|
|
|
|
|
|
def get_asn_descriptions(self, asn: int, all_descriptions=False):
|
|
|
|
descriptions = self.asn_meta.hgetall(f'{asn}|descriptions')
|
|
|
|
if all_descriptions or not descriptions:
|
|
|
|
return descriptions
|
|
|
|
return descriptions[sorted(descriptions.keys(), reverse=True)[0]]
|
2018-05-31 15:48:11 +02:00
|
|
|
|
2018-06-07 16:18:50 +02:00
|
|
|
def get_prefix_ips(self, asn: int, prefix: str, date: Dates=datetime.date.today(), source: str='', ipversion: str='v4'):
|
2018-06-01 17:13:56 +02:00
|
|
|
if source:
|
|
|
|
sources = [source]
|
|
|
|
else:
|
|
|
|
sources = self.get_sources(date)
|
|
|
|
prefix_ips = defaultdict(list)
|
|
|
|
d = self.__normalize_date(date)
|
|
|
|
for source in sources:
|
|
|
|
ips = set([ip_ts.split('|')[0]
|
|
|
|
for ip_ts in self.storage.smembers(f'{d}|{source}|{asn}|{prefix}')])
|
|
|
|
[prefix_ips[ip].append(source) for ip in ips]
|
|
|
|
return prefix_ips
|
|
|
|
|
2018-06-07 16:18:50 +02:00
|
|
|
def get_asn_history(self, asn: int, period: int=100, source: str='', ipversion: str='v4', date: Dates=datetime.date.today()):
|
2018-05-31 15:48:11 +02:00
|
|
|
to_return = []
|
2018-06-07 16:18:50 +02:00
|
|
|
|
|
|
|
if isinstance(date, str):
|
|
|
|
date = parse(date).date()
|
|
|
|
if date + timedelta(days=period / 3) > datetime.date.today():
|
|
|
|
# the period to display will be around the date passed at least 2/3 before the date, at most 1/3 after
|
|
|
|
date = datetime.date.today()
|
|
|
|
|
2018-05-31 15:48:11 +02:00
|
|
|
for i in range(period):
|
2018-06-07 16:18:50 +02:00
|
|
|
d = date - timedelta(days=i)
|
|
|
|
rank = self.asn_rank(asn, d, source, ipversion)
|
2018-05-31 17:37:35 +02:00
|
|
|
if rank is None:
|
|
|
|
rank = 0
|
2018-06-07 16:18:50 +02:00
|
|
|
to_return.insert(0, (d.isoformat(), rank))
|
2018-05-31 15:48:11 +02:00
|
|
|
return to_return
|
2018-07-27 14:33:25 +02:00
|
|
|
|
|
|
|
def country_rank(self, country: str, date: Dates=datetime.date.today(), source: str='', ipversion: str='v4'):
|
|
|
|
ripe = StatsRIPE()
|
|
|
|
d = self.__normalize_date(date)
|
|
|
|
response = ripe.country_asns(country, query_time=d, details=1)
|
|
|
|
if (not response.get('data') or not response['data'].get('countries') or not
|
|
|
|
response['data']['countries'][0].get('routed')):
|
|
|
|
logging.warning(f'Invalid response: {response}')
|
|
|
|
# FIXME: return something
|
|
|
|
return
|
|
|
|
return sum([self.asn_rank(asn, d, source, ipversion) for asn in response['data']['countries'][0]['routed']])
|
|
|
|
|
|
|
|
def country_history(self, country: str, period: int=30, source: str='', ipversion: str='v4', date: Dates=datetime.date.today()):
|
|
|
|
to_return = []
|
|
|
|
|
|
|
|
if isinstance(date, str):
|
|
|
|
date = parse(date).date()
|
|
|
|
if date + timedelta(days=period / 3) > datetime.date.today():
|
|
|
|
# the period to display will be around the date passed at least 2/3 before the date, at most 1/3 after
|
|
|
|
date = datetime.date.today()
|
|
|
|
|
|
|
|
for i in range(period):
|
|
|
|
d = date - timedelta(days=i)
|
|
|
|
rank = self.country_rank(country, d, source, ipversion)
|
|
|
|
if rank is None:
|
|
|
|
rank = 0
|
|
|
|
to_return.insert(0, (d.isoformat(), rank))
|
|
|
|
return to_return
|