BGP-Ranking/bgpranking/ranking.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import logging
from redis import StrictRedis
from .libs.helpers import set_running, unset_running, get_socket_path, load_config_files, get_ipasn, sanity_check_ipasn
from datetime import datetime, date, timedelta
from ipaddress import ip_network
from pathlib import Path


class Ranking():

    def __init__(self, config_dir: Path=None, loglevel: int=logging.DEBUG):
        self.__init_logger(loglevel)
        self.storage = StrictRedis(unix_socket_path=get_socket_path('storage'), decode_responses=True)
        self.ranking = StrictRedis(unix_socket_path=get_socket_path('storage'), db=1, decode_responses=True)
        self.ipasn = get_ipasn()
        self.config_dir = config_dir

    def __init_logger(self, loglevel):
        self.logger = logging.getLogger(f'{self.__class__.__name__}')
        self.logger.setLevel(loglevel)

    def rank_a_day(self, day: str, config_files: dict):
        asns_aggregation_key_v4 = f'{day}|asns|v4'
        asns_aggregation_key_v6 = f'{day}|asns|v6'
        to_delete = set([asns_aggregation_key_v4, asns_aggregation_key_v6])
        r_pipeline = self.ranking.pipeline()
        cached_meta = {}
        for source in self.storage.smembers(f'{day}|sources'):
            self.logger.info(f'{day} - Ranking source: {source}')
            source_aggregation_key_v4 = f'{day}|{source}|asns|v4'
            source_aggregation_key_v6 = f'{day}|{source}|asns|v6'
            to_delete.update([source_aggregation_key_v4, source_aggregation_key_v6])
            for asn in self.storage.smembers(f'{day}|{source}'):
                prefixes_aggregation_key_v4 = f'{day}|{asn}|v4'
                prefixes_aggregation_key_v6 = f'{day}|{asn}|v6'
                to_delete.update([prefixes_aggregation_key_v4, prefixes_aggregation_key_v6])
                if asn == '0':
                    # Default ASN when no matches. Probably spoofed.
                    continue
                self.logger.debug(f'{day} - Ranking source: {source} / ASN: {asn}')
                asn_rank_v4 = 0.0
                asn_rank_v6 = 0.0
                for prefix in self.storage.smembers(f'{day}|{source}|{asn}'):
                    if prefix == 'None':
                        # This should not happen and requires a DB cleanup.
                        self.logger.critical(f'Fucked up prefix in "{day}|{source}|{asn}"')
                        continue
                    ips = set([ip_ts.split('|')[0]
                               for ip_ts in self.storage.smembers(f'{day}|{source}|{asn}|{prefix}')])
                    py_prefix = ip_network(prefix)
                    prefix_rank = float(len(ips)) / py_prefix.num_addresses
                    r_pipeline.zadd(f'{day}|{source}|{asn}|v{py_prefix.version}|prefixes', {prefix: prefix_rank})
                    if py_prefix.version == 4:
                        asn_rank_v4 += len(ips) * config_files[source]['impact']
                        r_pipeline.zincrby(prefixes_aggregation_key_v4, prefix_rank * config_files[source]['impact'], prefix)
                    else:
                        asn_rank_v6 += len(ips) * config_files[source]['impact']
                        r_pipeline.zincrby(prefixes_aggregation_key_v6, prefix_rank * config_files[source]['impact'], prefix)
                if asn in cached_meta:
                    v4info = cached_meta[asn]['v4']
                    v6info = cached_meta[asn]['v6']
                else:
                    v4info = self.ipasn.asn_meta(asn=asn, source='caida', address_family='v4', date=day)
                    v6info = self.ipasn.asn_meta(asn=asn, source='caida', address_family='v6', date=day)
                    cached_meta[asn] = {'v4': v4info, 'v6': v6info}
                ipasnhistory_date_v4 = list(v4info['response'].keys())[0]
                v4count = v4info['response'][ipasnhistory_date_v4][asn]['ipcount']
                ipasnhistory_date_v6 = list(v6info['response'].keys())[0]
                v6count = v6info['response'][ipasnhistory_date_v6][asn]['ipcount']
                if v4count:
                    asn_rank_v4 /= float(v4count)
                    if asn_rank_v4:
                        r_pipeline.set(f'{day}|{source}|{asn}|v4', asn_rank_v4)
                        r_pipeline.zincrby(asns_aggregation_key_v4, asn_rank_v4, asn)
                        r_pipeline.zadd(source_aggregation_key_v4, {asn: asn_rank_v4})
                if v6count:
                    asn_rank_v6 /= float(v6count)
                    if asn_rank_v6:
                        r_pipeline.set(f'{day}|{source}|{asn}|v6', asn_rank_v6)
                        r_pipeline.zincrby(asns_aggregation_key_v6, asn_rank_v6, asn)
                        r_pipeline.zadd(source_aggregation_key_v6, {asn: asn_rank_v6})
        self.ranking.delete(*to_delete)
        r_pipeline.execute()

    def compute(self):
        config_files = load_config_files(self.config_dir)
        ready, message = sanity_check_ipasn(self.ipasn)
        if not ready:
            # Try again later.
            self.logger.warning(message)
            return
        self.logger.debug(message)

        self.logger.info('Start ranking')
        set_running(self.__class__.__name__)
        today = date.today()
        now = datetime.now()
        today12am = now.replace(hour=12, minute=0, second=0, microsecond=0)
        if now < today12am:
            # Compute yesterday and today's ranking (useful when we have lists generated only once a day)
            self.rank_a_day((today - timedelta(days=1)).isoformat(), config_files)
        self.rank_a_day(today.isoformat(), config_files)
        unset_running(self.__class__.__name__)
        self.logger.info('Ranking done.')
Add Ranking. Make all possible strings f-string. 2018-04-10 00:20:59 +02:00			`#!/usr/bin/env python`
			`# -- coding: utf-8 --`

			`import logging`
			`from redis import StrictRedis`
fix: Use ipcount from IPASN History 2018-11-15 19:05:57 +01:00			`from .libs.helpers import set_running, unset_running, get_socket_path, load_config_files, get_ipasn, sanity_check_ipasn`
new: Add Shadowserver module 2018-07-13 14:51:00 +02:00			`from datetime import datetime, date, timedelta`
Add Ranking. Make all possible strings f-string. 2018-04-10 00:20:59 +02:00			`from ipaddress import ip_network`
			`from pathlib import Path`


			`class Ranking():`

			`def __init__(self, config_dir: Path=None, loglevel: int=logging.DEBUG):`
			`self.__init_logger(loglevel)`
			`self.storage = StrictRedis(unix_socket_path=get_socket_path('storage'), decode_responses=True)`
			`self.ranking = StrictRedis(unix_socket_path=get_socket_path('storage'), db=1, decode_responses=True)`
fix: Use ipcount from IPASN History 2018-11-15 19:05:57 +01:00			`self.ipasn = get_ipasn()`
fix: Typo in last commit 2018-07-20 10:21:10 +02:00			`self.config_dir = config_dir`
Add Ranking. Make all possible strings f-string. 2018-04-10 00:20:59 +02:00
			`def __init_logger(self, loglevel):`
			`self.logger = logging.getLogger(f'{self.__class__.__name__}')`
			`self.logger.setLevel(loglevel)`

fix: Make rank_a_day easier to use. 2019-04-09 11:05:14 +02:00			`def rank_a_day(self, day: str, config_files: dict):`
new: Add Shadowserver module 2018-07-13 14:51:00 +02:00			`asns_aggregation_key_v4 = f'{day}\|asns\|v4'`
			`asns_aggregation_key_v6 = f'{day}\|asns\|v6'`
chg: Reduce the queries against the ranking db 2018-04-10 14:36:08 +02:00			`to_delete = set([asns_aggregation_key_v4, asns_aggregation_key_v6])`
			`r_pipeline = self.ranking.pipeline()`
chg: Cache the ASN meta info during a ranking 2019-04-09 11:56:12 +02:00			`cached_meta = {}`
new: Add Shadowserver module 2018-07-13 14:51:00 +02:00			`for source in self.storage.smembers(f'{day}\|sources'):`
			`self.logger.info(f'{day} - Ranking source: {source}')`
			`source_aggregation_key_v4 = f'{day}\|{source}\|asns\|v4'`
			`source_aggregation_key_v6 = f'{day}\|{source}\|asns\|v6'`
new: Major rewrite of the website 2018-04-12 18:09:04 +02:00			`to_delete.update([source_aggregation_key_v4, source_aggregation_key_v6])`
new: Add Shadowserver module 2018-07-13 14:51:00 +02:00			`for asn in self.storage.smembers(f'{day}\|{source}'):`
			`prefixes_aggregation_key_v4 = f'{day}\|{asn}\|v4'`
			`prefixes_aggregation_key_v6 = f'{day}\|{asn}\|v6'`
chg: Reduce the queries against the ranking db 2018-04-10 14:36:08 +02:00			`to_delete.update([prefixes_aggregation_key_v4, prefixes_aggregation_key_v6])`
fix: last commit. 2018-04-10 01:46:38 +02:00			`if asn == '0':`
			`# Default ASN when no matches. Probably spoofed.`
			`continue`
new: Add Shadowserver module 2018-07-13 14:51:00 +02:00			`self.logger.debug(f'{day} - Ranking source: {source} / ASN: {asn}')`
Add Ranking. Make all possible strings f-string. 2018-04-10 00:20:59 +02:00			`asn_rank_v4 = 0.0`
			`asn_rank_v6 = 0.0`
new: Add Shadowserver module 2018-07-13 14:51:00 +02:00			`for prefix in self.storage.smembers(f'{day}\|{source}\|{asn}'):`
new: Uses IPASN History for the routing information. 2018-11-14 17:07:30 +01:00			`if prefix == 'None':`
fix: Use ipcount from IPASN History 2018-11-15 19:05:57 +01:00			`# This should not happen and requires a DB cleanup.`
			`self.logger.critical(f'Fucked up prefix in "{day}\|{source}\|{asn}"')`
new: Uses IPASN History for the routing information. 2018-11-14 17:07:30 +01:00			`continue`
Add Ranking. Make all possible strings f-string. 2018-04-10 00:20:59 +02:00			`ips = set([ip_ts.split('\|')[0]`
new: Add Shadowserver module 2018-07-13 14:51:00 +02:00			`for ip_ts in self.storage.smembers(f'{day}\|{source}\|{asn}\|{prefix}')])`
fix: last commit. 2018-04-10 01:46:38 +02:00			`py_prefix = ip_network(prefix)`
			`prefix_rank = float(len(ips)) / py_prefix.num_addresses`
new: Force usage of redis-py 3+ 2018-11-26 14:59:53 +01:00			`r_pipeline.zadd(f'{day}\|{source}\|{asn}\|v{py_prefix.version}\|prefixes', {prefix: prefix_rank})`
fix: last commit. 2018-04-10 01:46:38 +02:00			`if py_prefix.version == 4:`
fix: Make rank_a_day easier to use. 2019-04-09 11:05:14 +02:00			`asn_rank_v4 += len(ips) * config_files[source]['impact']`
			`r_pipeline.zincrby(prefixes_aggregation_key_v4, prefix_rank * config_files[source]['impact'], prefix)`
Add Ranking. Make all possible strings f-string. 2018-04-10 00:20:59 +02:00			`else:`
fix: Make rank_a_day easier to use. 2019-04-09 11:05:14 +02:00			`asn_rank_v6 += len(ips) * config_files[source]['impact']`
			`r_pipeline.zincrby(prefixes_aggregation_key_v6, prefix_rank * config_files[source]['impact'], prefix)`
chg: Cache the ASN meta info during a ranking 2019-04-09 11:56:12 +02:00			`if asn in cached_meta:`
			`v4info = cached_meta[asn]['v4']`
			`v6info = cached_meta[asn]['v6']`
			`else:`
			`v4info = self.ipasn.asn_meta(asn=asn, source='caida', address_family='v4', date=day)`
			`v6info = self.ipasn.asn_meta(asn=asn, source='caida', address_family='v6', date=day)`
			`cached_meta[asn] = {'v4': v4info, 'v6': v6info}`
fix: Use ipcount from IPASN History 2018-11-15 19:05:57 +01:00			`ipasnhistory_date_v4 = list(v4info['response'].keys())[0]`
			`v4count = v4info['response'][ipasnhistory_date_v4][asn]['ipcount']`
			`ipasnhistory_date_v6 = list(v6info['response'].keys())[0]`
			`v6count = v6info['response'][ipasnhistory_date_v6][asn]['ipcount']`
fix: last commit. 2018-04-10 01:46:38 +02:00			`if v4count:`
fix: Only add ranking if the rank isn't 0 2018-04-10 11:57:47 +02:00			`asn_rank_v4 /= float(v4count)`
			`if asn_rank_v4:`
new: Add Shadowserver module 2018-07-13 14:51:00 +02:00			`r_pipeline.set(f'{day}\|{source}\|{asn}\|v4', asn_rank_v4)`
new: Force usage of redis-py 3+ 2018-11-26 14:59:53 +01:00			`r_pipeline.zincrby(asns_aggregation_key_v4, asn_rank_v4, asn)`
			`r_pipeline.zadd(source_aggregation_key_v4, {asn: asn_rank_v4})`
fix: last commit. 2018-04-10 01:46:38 +02:00			`if v6count:`
fix: Only add ranking if the rank isn't 0 2018-04-10 11:57:47 +02:00			`asn_rank_v6 /= float(v6count)`
			`if asn_rank_v6:`
new: Add Shadowserver module 2018-07-13 14:51:00 +02:00			`r_pipeline.set(f'{day}\|{source}\|{asn}\|v6', asn_rank_v6)`
new: Force usage of redis-py 3+ 2018-11-26 14:59:53 +01:00			`r_pipeline.zincrby(asns_aggregation_key_v6, asn_rank_v6, asn)`
			`r_pipeline.zadd(source_aggregation_key_v6, {asn: asn_rank_v6})`
chg: Reduce the queries against the ranking db 2018-04-10 14:36:08 +02:00			`self.ranking.delete(*to_delete)`
			`r_pipeline.execute()`
new: Add daily aggregated rankings 2018-04-10 11:21:03 +02:00
new: Add Shadowserver module 2018-07-13 14:51:00 +02:00			`def compute(self):`
fix: Make rank_a_day easier to use. 2019-04-09 11:05:14 +02:00			`config_files = load_config_files(self.config_dir)`
fix: Use ipcount from IPASN History 2018-11-15 19:05:57 +01:00			`ready, message = sanity_check_ipasn(self.ipasn)`
			`if not ready:`
			`# Try again later.`
			`self.logger.warning(message)`
			`return`
			`self.logger.debug(message)`

new: Add Shadowserver module 2018-07-13 14:51:00 +02:00			`self.logger.info('Start ranking')`
			`set_running(self.__class__.__name__)`
			`today = date.today()`
			`now = datetime.now()`
			`today12am = now.replace(hour=12, minute=0, second=0, microsecond=0)`
			`if now < today12am:`
			`# Compute yesterday and today's ranking (useful when we have lists generated only once a day)`
fix: Make rank_a_day easier to use. 2019-04-09 11:05:14 +02:00			`self.rank_a_day((today - timedelta(days=1)).isoformat(), config_files)`
			`self.rank_a_day(today.isoformat(), config_files)`
Add Ranking. Make all possible strings f-string. 2018-04-10 00:20:59 +02:00			`unset_running(self.__class__.__name__)`
			`self.logger.info('Ranking done.')`