2018-03-12 15:29:18 +01:00
|
|
|
#!/usr/bin/env python
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
import logging
|
|
|
|
from redis import StrictRedis
|
2018-04-05 14:36:01 +02:00
|
|
|
from .libs.helpers import shutdown_requested, set_running, unset_running, get_socket_path
|
2018-03-12 15:29:18 +01:00
|
|
|
|
|
|
|
|
|
|
|
class DatabaseInsert():
|
|
|
|
|
|
|
|
def __init__(self, loglevel: int=logging.DEBUG):
|
|
|
|
self.__init_logger(loglevel)
|
2018-04-05 14:36:01 +02:00
|
|
|
self.ardb_storage = StrictRedis(unix_socket_path=get_socket_path('storage'), decode_responses=True)
|
|
|
|
self.redis_sanitized = StrictRedis(unix_socket_path=get_socket_path('prepare'), db=0, decode_responses=True)
|
|
|
|
self.ris_cache = StrictRedis(unix_socket_path=get_socket_path('ris'), db=0, decode_responses=True)
|
2018-03-12 15:29:18 +01:00
|
|
|
self.logger.debug('Starting import')
|
|
|
|
|
|
|
|
def __init_logger(self, loglevel):
|
2018-04-10 00:20:59 +02:00
|
|
|
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
2018-03-12 15:29:18 +01:00
|
|
|
self.logger.setLevel(loglevel)
|
|
|
|
|
2018-03-29 22:37:28 +02:00
|
|
|
def insert(self):
|
|
|
|
set_running(self.__class__.__name__)
|
2018-03-12 15:29:18 +01:00
|
|
|
while True:
|
2018-03-29 22:37:28 +02:00
|
|
|
if shutdown_requested():
|
|
|
|
break
|
2018-04-10 14:47:51 +02:00
|
|
|
uuids = self.redis_sanitized.spop('to_insert', 1000)
|
2018-03-30 14:33:33 +02:00
|
|
|
if not uuids:
|
2018-03-12 15:29:18 +01:00
|
|
|
break
|
2018-03-30 14:33:33 +02:00
|
|
|
p = self.redis_sanitized.pipeline(transaction=False)
|
|
|
|
[p.hgetall(uuid) for uuid in uuids]
|
|
|
|
sanitized_data = p.execute()
|
|
|
|
|
|
|
|
retry = []
|
|
|
|
done = []
|
|
|
|
prefix_missing = []
|
|
|
|
ardb_pipeline = self.ardb_storage.pipeline(transaction=False)
|
|
|
|
for i, uuid in enumerate(uuids):
|
|
|
|
data = sanitized_data[i]
|
|
|
|
if not data:
|
2018-04-10 00:20:59 +02:00
|
|
|
self.logger.warning(f'No data for UUID {uuid}. This should not happen, but lets move on.')
|
2018-03-30 14:33:33 +02:00
|
|
|
continue
|
|
|
|
# Data gathered from the RIS queries:
|
|
|
|
# * IP Block of the IP -> https://stat.ripe.net/docs/data_api#NetworkInfo
|
|
|
|
# * AS number -> https://stat.ripe.net/docs/data_api#NetworkInfo
|
|
|
|
# * Full text description of the AS (older name) -> https://stat.ripe.net/docs/data_api#AsOverview
|
|
|
|
ris_entry = self.ris_cache.hgetall(data['ip'])
|
|
|
|
if not ris_entry:
|
|
|
|
# RIS data not available yet, retry later
|
|
|
|
retry.append(uuid)
|
|
|
|
# In case this IP is missing in the set to process
|
|
|
|
prefix_missing.append(data['ip'])
|
|
|
|
continue
|
|
|
|
# Format: <YYYY-MM-DD>|sources -> set([<source>, ...])
|
2018-04-10 00:20:59 +02:00
|
|
|
ardb_pipeline.sadd(f"{data['date']}|sources", data['source'])
|
2018-03-30 14:33:33 +02:00
|
|
|
|
|
|
|
# Format: <YYYY-MM-DD>|<source> -> set([<asn>, ...])
|
2018-04-10 00:20:59 +02:00
|
|
|
ardb_pipeline.sadd(f"{data['date']}|{data['source']}", ris_entry['asn'])
|
2018-03-30 14:33:33 +02:00
|
|
|
# Format: <YYYY-MM-DD>|<source>|<asn> -> set([<prefix>, ...])
|
2018-04-10 00:20:59 +02:00
|
|
|
ardb_pipeline.sadd(f"{data['date']}|{data['source']}|{ris_entry['asn']}", ris_entry['prefix'])
|
2018-03-12 15:29:18 +01:00
|
|
|
|
2018-03-30 14:33:33 +02:00
|
|
|
# Format: <YYYY-MM-DD>|<source>|<asn>|<prefix> -> set([<ip>|<datetime>, ...])
|
2018-04-10 00:20:59 +02:00
|
|
|
ardb_pipeline.sadd(f"{data['date']}|{data['source']}|{ris_entry['asn']}|{ris_entry['prefix']}",
|
|
|
|
f"{data['ip']}|{data['datetime']}")
|
2018-03-30 14:33:33 +02:00
|
|
|
done.append(uuid)
|
|
|
|
ardb_pipeline.execute()
|
|
|
|
if prefix_missing:
|
|
|
|
self.ris_cache.sadd('for_ris_lookup', *prefix_missing)
|
|
|
|
p = self.redis_sanitized.pipeline(transaction=False)
|
|
|
|
if done:
|
|
|
|
p.delete(*done)
|
|
|
|
if retry:
|
|
|
|
p.sadd('to_insert', *retry)
|
|
|
|
p.execute()
|
2018-03-29 22:37:28 +02:00
|
|
|
unset_running(self.__class__.__name__)
|