From 2cdab551031485b4a9594805f2a35e0026d4486f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Fri, 13 Apr 2018 18:02:44 +0200 Subject: [PATCH] new: Add ASN descriptions --- bgpranking/asn_descriptions.py | 66 ++++++++++++++++++++++++++++++++ bgpranking/parser.py | 30 ++++++++------- bgpranking/querying.py | 6 +++ bin/asn_descriptions.py | 29 ++++++++++++++ setup.py | 2 +- website/web/__init__.py | 4 +- website/web/templates/index.html | 4 +- 7 files changed, 125 insertions(+), 16 deletions(-) create mode 100644 bgpranking/asn_descriptions.py create mode 100755 bin/asn_descriptions.py diff --git a/bgpranking/asn_descriptions.py b/bgpranking/asn_descriptions.py new file mode 100644 index 0000000..274ac31 --- /dev/null +++ b/bgpranking/asn_descriptions.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import logging +from pathlib import Path +import requests +from redis import StrictRedis +from dateutil.parser import parse +import re + +from .libs.helpers import set_running, unset_running, get_socket_path, safe_create_dir + + +class ASNDescriptions(): + + def __init__(self, storage_directory: Path, loglevel: int=logging.DEBUG): + self.__init_logger(loglevel) + self.asn_meta = StrictRedis(unix_socket_path=get_socket_path('storage'), db=2, decode_responses=True) + self.logger.debug('Starting ASN History') + self.directory = storage_directory / 'ans_descriptions' + safe_create_dir(self.directory) + self.archives = self.directory / 'archive' + safe_create_dir(self.archives) + self.url = 'http://www.cidr-report.org/as2.0/autnums.html' + + def __init_logger(self, loglevel): + self.logger = logging.getLogger(f'{self.__class__.__name__}') + self.logger.setLevel(loglevel) + + def __update_available(self): + r = requests.head(self.url) + current_last_modified = parse(r.headers['Last-Modified']) + if not self.asn_meta.exists('ans_description_last_update'): + return True + last_update = parse(self.asn_meta.get('ans_description_last_update')) + if last_update < current_last_modified: + return True + return False + + def load_descriptions(self): + if not self.__update_available(): + self.logger.debug('No new file to import.') + return + set_running(self.__class__.__name__) + self.logger.info('Importing new ASN descriptions.') + r = requests.get(self.url) + last_modified = parse(r.headers['Last-Modified']).isoformat() + p = self.asn_meta.pipeline() + new_asn = 0 + new_description = 0 + for asn, descr in re.findall('as=AS(.*)&.* (.*)\n', r.text): + existing_descriptions = self.asn_meta.hgetall(f'{asn}|descriptions') + if not existing_descriptions: + self.logger.debug(f'New ASN: {asn} - {descr}') + p.hset(f'{asn}|descriptions', last_modified, descr) + new_asn += 1 + else: + last_descr = sorted(existing_descriptions.keys(), reverse=True)[0] + if descr != existing_descriptions[last_descr]: + self.logger.debug(f'New description for {asn}: {existing_descriptions[last_descr]} -> {descr}') + p.hset(f'{asn}|descriptions', last_modified, descr) + new_description += 1 + p.set('ans_description_last_update', last_modified) + p.execute() + self.logger.info(f'Done with import. New ASNs: {new_asn}, new descriptions: {new_description}') + unset_running(self.__class__.__name__) diff --git a/bgpranking/parser.py b/bgpranking/parser.py index 6f19da7..70b49d4 100644 --- a/bgpranking/parser.py +++ b/bgpranking/parser.py @@ -57,19 +57,23 @@ class RawFilesParser(): def parse_raw_files(self): set_running(f'{self.__class__.__name__}-{self.source}') - for filepath in self.files_to_parse: - self.logger.debug('Parsing {}, {} to go.'.format(filepath, len(self.files_to_parse) - 1)) - with open(filepath, 'rb') as f: - to_parse = BytesIO(f.read()) - p = self.redis_intake.pipeline() - for ip in self.parse_raw_file(to_parse): - uuid = uuid4() - p.hmset(uuid, {'ip': ip, 'source': self.source, - 'datetime': self.datetime.isoformat()}) - p.sadd('intake', uuid) - p.execute() - self._archive(filepath) - unset_running(f'{self.__class__.__name__}-{self.source}') + try: + for filepath in self.files_to_parse: + self.logger.debug('Parsing {}, {} to go.'.format(filepath, len(self.files_to_parse) - 1)) + with open(filepath, 'rb') as f: + to_parse = BytesIO(f.read()) + p = self.redis_intake.pipeline() + for ip in self.parse_raw_file(to_parse): + uuid = uuid4() + p.hmset(uuid, {'ip': ip, 'source': self.source, + 'datetime': self.datetime.isoformat()}) + p.sadd('intake', uuid) + p.execute() + self._archive(filepath) + except Exception as e: + self.logger.exception("That didn't go well") + finally: + unset_running(f'{self.__class__.__name__}-{self.source}') def _archive(self, filepath: Path): '''After processing, move file to the archive directory''' diff --git a/bgpranking/querying.py b/bgpranking/querying.py index 24d02c6..ad5f6ec 100644 --- a/bgpranking/querying.py +++ b/bgpranking/querying.py @@ -69,3 +69,9 @@ class Querying(): d = self.__normalize_date(date) key = f'{d}|sources' return self.storage.smembers(key) + + def get_asn_descriptions(self, asn: int, all_descriptions=False): + descriptions = self.asn_meta.hgetall(f'{asn}|descriptions') + if all_descriptions or not descriptions: + return descriptions + return descriptions[sorted(descriptions.keys(), reverse=True)[0]] diff --git a/bin/asn_descriptions.py b/bin/asn_descriptions.py new file mode 100755 index 0000000..19436e1 --- /dev/null +++ b/bin/asn_descriptions.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import logging +from pathlib import Path + +from bgpranking.abstractmanager import AbstractManager +from bgpranking.asn_descriptions import ASNDescriptions +from bgpranking.libs.helpers import get_homedir + +logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s', + level=logging.INFO, datefmt='%I:%M:%S') + + +class ASNDescriptionsManager(AbstractManager): + + def __init__(self, storage_directory: Path=None, loglevel: int=logging.DEBUG): + super().__init__(loglevel) + if not storage_directory: + storage_directory = get_homedir() / 'rawdata' + self.asn_descr = ASNDescriptions(storage_directory, loglevel) + + def _to_run_forever(self): + self.asn_descr.load_descriptions() + + +if __name__ == '__main__': + asnd_manager = ASNDescriptionsManager() + asnd_manager.run(sleep_in_sec=3600) diff --git a/setup.py b/setup.py index b0c8bae..2b3db44 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ setup( packages=['bgpranking'], scripts=['bin/archiver.py', 'bin/dbinsert.py', 'bin/fetcher.py', 'bin/parser.py', 'bin/loadprefixes.py', 'bin/rislookup.py', 'bin/sanitizer.py', 'bin/run_backend.py', - 'bin/monitor.py', 'bin/ranking.py', 'bin/start.py', 'bin/stop.py', 'bin/shutdown.py'], + 'bin/monitor.py', 'bin/ranking.py', 'bin/asn_descriptions.py', 'bin/start.py', 'bin/stop.py', 'bin/shutdown.py'], classifiers=[ 'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)', 'Development Status :: 3 - Alpha', diff --git a/website/web/__init__.py b/website/web/__init__.py index 291aa12..4f543fb 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -49,7 +49,9 @@ def index(): sources = q.get_sources(date=session['date']) session.pop('asn', None) ranks = q.asns_global_ranking(limit=-1, **session) - return render_template('index.html', ranks=ranks, sources=sources, **session) + descriptions = [q.get_asn_descriptions(int(asn)) for asn, rank in ranks] + r = zip(ranks, descriptions) + return render_template('index.html', ranks=r, sources=sources, **session) @app.route('/asn', methods=['GET', 'POST']) diff --git a/website/web/templates/index.html b/website/web/templates/index.html index f72c1d6..270d9dd 100644 --- a/website/web/templates/index.html +++ b/website/web/templates/index.html @@ -39,11 +39,13 @@ ASN Rank + Description - {% for asn, rank in ranks %} + {% for (asn, rank), description in ranks %} {{ asn }} {{ rank }} + {{ description }} {% endfor %}