From 5ce2f91430d2f5701f3dbb735ef91eec19df4a87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Wed, 14 Nov 2018 17:07:30 +0100 Subject: [PATCH] new: Uses IPASN History for the routing information. --- bgpranking/abstractmanager.py | 2 +- bgpranking/config/bgpranking.json | 3 + bgpranking/dbinsert.py | 56 +- bgpranking/libs/exceptions.py | 12 + bgpranking/libs/helpers.py | 41 +- bgpranking/monitor.py | 31 +- bgpranking/prefixdb.py | 97 -- bgpranking/querying.py | 18 +- bgpranking/ranking.py | 6 +- bgpranking/risfetcher.py | 82 -- bgpranking/sanitizer.py | 38 +- bin/loadprefixes.py | 30 - bin/monitor.py | 5 +- bin/rislookup.py | 25 - bin/run_backend.py | 4 +- bin/shutdown.py | 2 +- bin/start.py | 2 - cache/{prefixes.conf => cache.conf} | 4 +- cache/ris.conf | 1317 --------------------------- cache/run_redis.sh | 3 +- cache/shutdown_redis.sh | 3 +- old/StatsRipeText.py | 18 - old/initranking_RIPE.py | 79 -- old/risfetcher_RIPE.py | 61 -- requirements.txt | 3 + setup.py | 2 +- 26 files changed, 154 insertions(+), 1790 deletions(-) create mode 100644 bgpranking/config/bgpranking.json delete mode 100644 bgpranking/prefixdb.py delete mode 100644 bgpranking/risfetcher.py delete mode 100755 bin/loadprefixes.py delete mode 100755 bin/rislookup.py rename cache/{prefixes.conf => cache.conf} (99%) delete mode 100644 cache/ris.conf delete mode 100644 old/StatsRipeText.py delete mode 100644 old/initranking_RIPE.py delete mode 100644 old/risfetcher_RIPE.py diff --git a/bgpranking/abstractmanager.py b/bgpranking/abstractmanager.py index 3d40948..2840eb2 100644 --- a/bgpranking/abstractmanager.py +++ b/bgpranking/abstractmanager.py @@ -27,7 +27,7 @@ class AbstractManager(ABC): try: self._to_run_forever() except Exception: - self.logger.exception(f'Something went terribly wrong in {self.__class__.__name__}.') + self.logger.exception(f'Something went wrong in {self.__class__.__name__}.') if not long_sleep(sleep_in_sec): break self.logger.info(f'Shutting down {self.__class__.__name__}') diff --git a/bgpranking/config/bgpranking.json b/bgpranking/config/bgpranking.json new file mode 100644 index 0000000..83f1d39 --- /dev/null +++ b/bgpranking/config/bgpranking.json @@ -0,0 +1,3 @@ +{ + "ipasnhistory_url": "http://127.0.0.1:5176/" +} diff --git a/bgpranking/dbinsert.py b/bgpranking/dbinsert.py index 12350ff..08835b8 100644 --- a/bgpranking/dbinsert.py +++ b/bgpranking/dbinsert.py @@ -3,7 +3,7 @@ import logging from redis import StrictRedis -from .libs.helpers import shutdown_requested, set_running, unset_running, get_socket_path +from .libs.helpers import shutdown_requested, set_running, unset_running, get_socket_path, get_ipasn, sanity_check_ipasn class DatabaseInsert(): @@ -12,7 +12,7 @@ class DatabaseInsert(): self.__init_logger(loglevel) self.ardb_storage = StrictRedis(unix_socket_path=get_socket_path('storage'), decode_responses=True) self.redis_sanitized = StrictRedis(unix_socket_path=get_socket_path('prepare'), db=0, decode_responses=True) - self.ris_cache = StrictRedis(unix_socket_path=get_socket_path('ris'), db=0, decode_responses=True) + self.ipasn = get_ipasn() self.logger.debug('Starting import') def __init_logger(self, loglevel): @@ -20,52 +20,70 @@ class DatabaseInsert(): self.logger.setLevel(loglevel) def insert(self): + ready, message = sanity_check_ipasn(self.ipasn) + if not ready: + # Try again later. + self.logger.warning(message) + return + self.logger.debug(message) + set_running(self.__class__.__name__) while True: if shutdown_requested(): break - uuids = self.redis_sanitized.spop('to_insert', 1000) + uuids = self.redis_sanitized.spop('to_insert', 100) if not uuids: break p = self.redis_sanitized.pipeline(transaction=False) [p.hgetall(uuid) for uuid in uuids] sanitized_data = p.execute() + for_query = [] + for i, uuid in enumerate(uuids): + data = sanitized_data[i] + if not data: + self.logger.warning(f'No data for UUID {uuid}. This should not happen, but lets move on.') + continue + for_query.append({'ip': data['ip'], 'address_family': data['address_family'], 'source': 'caida', + 'date': data['datetime'], 'precision_delta': {'days': 3}}) + responses = self.ipasn.mass_query(for_query) + retry = [] done = [] - prefix_missing = [] ardb_pipeline = self.ardb_storage.pipeline(transaction=False) for i, uuid in enumerate(uuids): data = sanitized_data[i] if not data: self.logger.warning(f'No data for UUID {uuid}. This should not happen, but lets move on.') continue - # Data gathered from the RIS queries: - # * IP Block of the IP -> https://stat.ripe.net/docs/data_api#NetworkInfo - # * AS number -> https://stat.ripe.net/docs/data_api#NetworkInfo - # * Full text description of the AS (older name) -> https://stat.ripe.net/docs/data_api#AsOverview - ris_entry = self.ris_cache.hgetall(data['ip']) - if not ris_entry: - # RIS data not available yet, retry later - retry.append(uuid) - # In case this IP is missing in the set to process - prefix_missing.append(data['ip']) + routing_info = responses['responses'][i][0] # our queries are on one single date, not a range + # Data gathered from IPASN History: + # * IP Block of the IP + # * AS number + if 'error' in routing_info: + self.logger.warning(f"Unable to find routing information for {data['ip']} - {data['datetime']}: {routing_info['error']}") continue + # Single date query, getting from the object + datetime_routing = list(routing_info.keys())[0] + entry = routing_info[datetime_routing] + if not entry: + # routing info is missing, need to try again later. + retry.append(uuid) + continue + # Format: |sources -> set([, ...]) ardb_pipeline.sadd(f"{data['date']}|sources", data['source']) # Format: | -> set([, ...]) - ardb_pipeline.sadd(f"{data['date']}|{data['source']}", ris_entry['asn']) + ardb_pipeline.sadd(f"{data['date']}|{data['source']}", entry['asn']) # Format: || -> set([, ...]) - ardb_pipeline.sadd(f"{data['date']}|{data['source']}|{ris_entry['asn']}", ris_entry['prefix']) + ardb_pipeline.sadd(f"{data['date']}|{data['source']}|{entry['asn']}", entry['prefix']) # Format: ||| -> set([|, ...]) - ardb_pipeline.sadd(f"{data['date']}|{data['source']}|{ris_entry['asn']}|{ris_entry['prefix']}", + ardb_pipeline.sadd(f"{data['date']}|{data['source']}|{entry['asn']}|{entry['prefix']}", f"{data['ip']}|{data['datetime']}") done.append(uuid) ardb_pipeline.execute() - if prefix_missing: - self.ris_cache.sadd('for_ris_lookup', *prefix_missing) p = self.redis_sanitized.pipeline(transaction=False) if done: p.delete(*done) diff --git a/bgpranking/libs/exceptions.py b/bgpranking/libs/exceptions.py index 222b21c..68805f1 100644 --- a/bgpranking/libs/exceptions.py +++ b/bgpranking/libs/exceptions.py @@ -24,3 +24,15 @@ class MissingEnv(BGPRankingException): class InvalidDateFormat(BGPRankingException): pass + + +class MissingConfigFile(BGPRankingException): + pass + + +class MissingConfigEntry(BGPRankingException): + pass + + +class ThirdPartyUnreachable(BGPRankingException): + pass diff --git a/bgpranking/libs/helpers.py b/bgpranking/libs/helpers.py index 89a54e4..2ebaea6 100644 --- a/bgpranking/libs/helpers.py +++ b/bgpranking/libs/helpers.py @@ -4,12 +4,13 @@ import os import sys from pathlib import Path -from .exceptions import CreateDirectoryException, MissingEnv +from .exceptions import CreateDirectoryException, MissingEnv, MissingConfigFile, MissingConfigEntry, ThirdPartyUnreachable from redis import StrictRedis from redis.exceptions import ConnectionError from datetime import datetime, timedelta import time import json +from pyipasnhistory import IPASNHistory def load_config_files(config_dir: Path=None) -> dict: @@ -54,24 +55,23 @@ def safe_create_dir(to_create: Path): def set_running(name: str): - r = StrictRedis(unix_socket_path=get_socket_path('prefixes'), db=1, decode_responses=True) + r = StrictRedis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True) r.hset('running', name, 1) def unset_running(name: str): - r = StrictRedis(unix_socket_path=get_socket_path('prefixes'), db=1, decode_responses=True) + r = StrictRedis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True) r.hdel('running', name) def is_running(): - r = StrictRedis(unix_socket_path=get_socket_path('prefixes'), db=1, decode_responses=True) + r = StrictRedis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True) return r.hgetall('running') def get_socket_path(name: str): mapping = { - 'ris': Path('cache', 'ris.sock'), - 'prefixes': Path('cache', 'prefixes.sock'), + 'cache': Path('cache', 'cache.sock'), 'storage': Path('storage', 'storage.sock'), 'intake': Path('temp', 'intake.sock'), 'prepare': Path('temp', 'prepare.sock'), @@ -79,6 +79,33 @@ def get_socket_path(name: str): return str(get_homedir() / mapping[name]) +def get_ipasn(): + general_config_file = get_config_path() / 'bgpranking.json' + if not general_config_file.exists(): + raise MissingConfigFile(f'The general configuration file ({general_config_file}) does not exists.') + with open(general_config_file) as f: + config = json.load(f) + if 'ipasnhistory_url' not in config: + raise MissingConfigEntry(f'"ipasnhistory_url" is missing in {general_config_file}.') + ipasn = IPASNHistory(config['ipasnhistory_url']) + if not ipasn.is_up: + raise ThirdPartyUnreachable(f"Unable to reach IPASNHistory on {config['ipasnhistory_url']}") + return ipasn + + +def sanity_check_ipasn(ipasn): + meta = ipasn.meta() + if 'error' in meta: + raise ThirdPartyUnreachable(f'IP ASN History has a problem: meta["error"]') + + v4_percent = meta['cached_dates']['caida']['v4']['percent'] + v6_percent = meta['cached_dates']['caida']['v6']['percent'] + if v4_percent < 90 or v6_percent < 90: # (this way it works if we only load 10 days) + # Try again later. + return False, f"IP ASN History is not ready: v4 {v4_percent}% / v6 {v6_percent}% loaded" + return True, f"IP ASN History is ready: v4 {v4_percent}% / v6 {v6_percent}% loaded" + + def check_running(name: str): socket_path = get_socket_path(name) try: @@ -90,7 +117,7 @@ def check_running(name: str): def shutdown_requested(): try: - r = StrictRedis(unix_socket_path=get_socket_path('prefixes'), db=1, decode_responses=True) + r = StrictRedis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True) return r.exists('shutdown') except ConnectionRefusedError: return True diff --git a/bgpranking/monitor.py b/bgpranking/monitor.py index 64ddfaa..c6e7c79 100644 --- a/bgpranking/monitor.py +++ b/bgpranking/monitor.py @@ -1,8 +1,9 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import json from redis import StrictRedis -from .libs.helpers import get_socket_path +from .libs.helpers import get_socket_path, get_ipasn class Monitor(): @@ -10,30 +11,12 @@ class Monitor(): def __init__(self): self.intake = StrictRedis(unix_socket_path=get_socket_path('intake'), db=0, decode_responses=True) self.sanitize = StrictRedis(unix_socket_path=get_socket_path('prepare'), db=0, decode_responses=True) - self.ris_cache = StrictRedis(unix_socket_path=get_socket_path('ris'), db=0, decode_responses=True) - self.prefix_cache = StrictRedis(unix_socket_path=get_socket_path('prefixes'), db=0, decode_responses=True) - self.running = StrictRedis(unix_socket_path=get_socket_path('prefixes'), db=1, decode_responses=True) - self.storage = StrictRedis(unix_socket_path=get_socket_path('storage'), decode_responses=True) - - def get_runinng(self): - return self.running.hgetall('running') - - def info_prefix_cache(self): - to_return = {'IPv6 Dump': '', 'IPv4 Dump': '', 'Number ASNs': 0} - if self.prefix_cache.exists('ready'): - v6_dump = self.prefix_cache.get('current|v6') - v4_dump = self.prefix_cache.get('current|v4') - number_as = self.prefix_cache.scard('asns') - to_return['IPv6 Dump'] = v6_dump - to_return['IPv4 Dump'] = v4_dump - to_return['Number ASNs'] = number_as - return to_return + self.cache = StrictRedis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True) + self.ipasn = get_ipasn() def get_values(self): ips_in_intake = self.intake.scard('intake') - waiting_for_ris_lookup = self.ris_cache.scard('for_ris_lookup') ready_to_insert = self.sanitize.scard('to_insert') - prefix_db_ready = self.prefix_cache.exists('ready') - return {'Non-parsed IPs': ips_in_intake, 'Parsed IPs': ready_to_insert, - 'Awaiting prefix lookup': waiting_for_ris_lookup, - 'Prefix database ready': prefix_db_ready} + return json.dumps({'Non-parsed IPs': ips_in_intake, 'Parsed IPs': ready_to_insert, + 'IPASN History': self.ipasn.meta(), 'running': self.cache.hgetall('running')}, + indent=2) diff --git a/bgpranking/prefixdb.py b/bgpranking/prefixdb.py deleted file mode 100644 index 9d83d95..0000000 --- a/bgpranking/prefixdb.py +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import logging -from redis import StrictRedis -from ipaddress import ip_network -import requests -import gzip -from io import BytesIO -from collections import defaultdict -import re -import time -from .libs.helpers import set_running, unset_running, get_socket_path -from dateutil.parser import parse - -# Dataset source: Routeviews Prefix to AS mappings Dataset for IPv4 and IPv6 -# http://www.caida.org/data/routing/routeviews-prefix2as.xml - - -class PrefixDatabase(): - - def __init__(self, loglevel: int=logging.DEBUG): - self.__init_logger(loglevel) - self.prefix_cache = StrictRedis(unix_socket_path=get_socket_path('prefixes'), db=0, decode_responses=True) - self.asn_meta = StrictRedis(unix_socket_path=get_socket_path('storage'), db=2, decode_responses=True) - self.ipv6_url = 'http://data.caida.org/datasets/routing/routeviews6-prefix2as/{}' - self.ipv4_url = 'http://data.caida.org/datasets/routing/routeviews-prefix2as/{}' - - def __init_logger(self, loglevel): - self.logger = logging.getLogger(f'{self.__class__.__name__}') - self.logger.setLevel(loglevel) - - def update_required(self): - v4_is_new, v4_path = self._has_new('v4', self.ipv4_url) - v6_is_new, v6_path = self._has_new('v6', self.ipv6_url) - if any([v4_is_new, v6_is_new]): - self.logger.info('Prefix update required.') - else: - self.logger.debug('No prefix update required.') - return any([v4_is_new, v6_is_new]) - - def _has_new(self, address_family, root_url): - r = requests.get(root_url.format('pfx2as-creation.log')) - last_entry = r.text.split('\n')[-2] - path = last_entry.split('\t')[-1] - if path == self.prefix_cache.get(f'current|{address_family}'): - self.logger.debug(f'Same file already loaded: {path}') - return False, path - return True, path - - def _init_routes(self, address_family, root_url, path) -> bool: - self.logger.debug(f'Loading {path}') - date = parse(re.findall('(?:.*)/(?:.*)/routeviews-rv[2,6]-(.*).pfx2as.gz', path)[0]).isoformat() - r = requests.get(root_url.format(path)) - to_import = defaultdict(lambda: {address_family: set(), 'ipcount': 0}) - with gzip.open(BytesIO(r.content), 'r') as f: - for line in f: - prefix, length, asns = line.decode().strip().split('\t') - # The meaning of AS set and multi-origin AS in unclear. Taking the first ASN in the list only. - asn = re.split('[,_]', asns)[0] - network = ip_network(f'{prefix}/{length}') - to_import[asn][address_family].add(str(network)) - to_import[asn]['ipcount'] += network.num_addresses - - p = self.prefix_cache.pipeline() - p_asn_meta = self.asn_meta.pipeline() - p.sadd('asns', *to_import.keys()) - p_asn_meta.set(f'{address_family}|last', date) # Not necessarely today - p_asn_meta.rpush(f'{address_family}|dates', date) - p_asn_meta.sadd(f'{date}|asns|{address_family}', *to_import.keys()) - for asn, data in to_import.items(): - p.sadd(f'{asn}|{address_family}', *data[address_family]) - p.set(f'{asn}|{address_family}|ipcount', data['ipcount']) - p_asn_meta.sadd(f'{date}|{asn}|{address_family}', *data[address_family]) - p_asn_meta.set(f'{date}|{asn}|{address_family}|ipcount', data['ipcount']) - p.set(f'current|{address_family}', path) - p.execute() - p_asn_meta.execute() - return True - - def load_prefixes(self): - set_running(self.__class__.__name__) - self.prefix_cache.delete('ready') - self.asn_meta.delete('v4|last') - self.asn_meta.delete('v6|last') - self.logger.info('Prefix update starting in a few seconds.') - time.sleep(15) - v4_is_new, v4_path = self._has_new('v4', self.ipv4_url) - v6_is_new, v6_path = self._has_new('v6', self.ipv6_url) - - self.prefix_cache.flushdb() - # TODO: Add a catchall for everything that isn't announced so we can track that down later on - self._init_routes('v6', self.ipv6_url, v6_path) - self._init_routes('v4', self.ipv4_url, v4_path) - self.prefix_cache.set('ready', 1) - self.logger.info('Prefix update complete.') - unset_running(self.__class__.__name__) diff --git a/bgpranking/querying.py b/bgpranking/querying.py index c31bba8..30243d4 100644 --- a/bgpranking/querying.py +++ b/bgpranking/querying.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- from typing import TypeVar, Union @@ -8,9 +8,10 @@ from dateutil.parser import parse from collections import defaultdict import logging +import json from redis import StrictRedis -from .libs.helpers import get_socket_path +from .libs.helpers import get_socket_path, get_config_path from .libs.exceptions import InvalidDateFormat from .libs.statsripe import StatsRIPE @@ -24,7 +25,7 @@ class Querying(): self.storage = StrictRedis(unix_socket_path=get_socket_path('storage'), decode_responses=True) self.ranking = StrictRedis(unix_socket_path=get_socket_path('storage'), db=1, decode_responses=True) self.asn_meta = StrictRedis(unix_socket_path=get_socket_path('storage'), db=2, decode_responses=True) - self.cache = StrictRedis(unix_socket_path=get_socket_path('ris'), db=1, decode_responses=True) + self.cache = StrictRedis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True) def __init_logger(self, loglevel: int): self.logger = logging.getLogger(f'{self.__class__.__name__}') @@ -200,3 +201,14 @@ class Querying(): rank = 0 to_return[c].insert(0, (d.isoformat(), rank, list(details))) return to_return + + def get_source_config(self): + pass + + def get_sources_configs(self): + config_dir = get_config_path() / 'modules' + loaded = [] + for modulepath in config_dir.glob('*.json'): + with open(modulepath) as f: + loaded.append(json.load(f)) + return {'{}-{}'.format(config['vendor'], config['name']): config for config in loaded} diff --git a/bgpranking/ranking.py b/bgpranking/ranking.py index 2b7e1f5..6e5c5ce 100644 --- a/bgpranking/ranking.py +++ b/bgpranking/ranking.py @@ -23,7 +23,7 @@ class Ranking(): self.logger.setLevel(loglevel) def rank_a_day(self, day: str): - # FIXME: If we want to rank an older date, we need to hav older datasets for the announces + # FIXME: If we want to rank an older date, we need to have older datasets for the announces v4_last, v6_last = self.asn_meta.mget('v4|last', 'v6|last') asns_aggregation_key_v4 = f'{day}|asns|v4' asns_aggregation_key_v6 = f'{day}|asns|v6' @@ -45,6 +45,10 @@ class Ranking(): asn_rank_v4 = 0.0 asn_rank_v6 = 0.0 for prefix in self.storage.smembers(f'{day}|{source}|{asn}'): + if prefix == 'None': + # FIXME, this should not happen + self.logger.warning(f'Fucked up prefix in "{day}|{source}|{asn}"') + continue ips = set([ip_ts.split('|')[0] for ip_ts in self.storage.smembers(f'{day}|{source}|{asn}|{prefix}')]) py_prefix = ip_network(prefix) diff --git a/bgpranking/risfetcher.py b/bgpranking/risfetcher.py deleted file mode 100644 index 1dc1fda..0000000 --- a/bgpranking/risfetcher.py +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import logging -from redis import StrictRedis - -import time -import pytricia -import ipaddress -from .libs.helpers import shutdown_requested, set_running, unset_running, get_socket_path - - -class RISPrefixLookup(): - - def __init__(self, loglevel: int=logging.DEBUG): - self.__init_logger(loglevel) - self.logger.info('Starting RIS Prefix fetcher') - self.prefix_db = StrictRedis(unix_socket_path=get_socket_path('prefixes'), db=0, decode_responses=True) - self.longest_prefix_matching = StrictRedis(unix_socket_path=get_socket_path('ris'), db=0, decode_responses=True) - self.tree_v4 = pytricia.PyTricia() - self.tree_v6 = pytricia.PyTricia(128) - self.force_init = True - self.current_v4 = None - self.current_v6 = None - - def __init_logger(self, loglevel): - self.logger = logging.getLogger(f'{self.__class__.__name__}') - self.logger.setLevel(loglevel) - - def cache_prefix(self, pipe, ip, prefix, asns): - pipe.hmset(ip, {'asn': asns, 'prefix': prefix}) - pipe.expire(ip, 43200) # 12H - - def init_tree(self): - for asn in self.prefix_db.smembers('asns'): - for prefix in self.prefix_db.smembers(f'{asn}|v4'): - self.tree_v4[prefix] = asn - for prefix in self.prefix_db.smembers(f'{asn}|v6'): - self.tree_v6[prefix] = asn - self.tree_v4['0.0.0.0/0'] = 0 - self.tree_v6['::/0'] = 0 - self.current_v4 = self.prefix_db.get('current|v4') - self.current_v6 = self.prefix_db.get('current|v6') - - def run(self): - set_running(self.__class__.__name__) - while True: - if shutdown_requested(): - break - if not self.prefix_db.get('ready'): - self.logger.debug('Prefix database not ready.') - time.sleep(5) - self.force_init = True - continue - if (self.force_init or - (self.current_v4 != self.prefix_db.get('current|v4')) or - (self.current_v6 != self.prefix_db.get('current|v6'))): - self.init_tree() - self.force_init = False - - ips = self.longest_prefix_matching.spop('for_ris_lookup', 100) - if not ips: # TODO: add a check against something to stop the loop - self.logger.debug('Nothing to lookup') - break - pipe = self.longest_prefix_matching.pipeline(transaction=False) - for ip in ips: - if self.longest_prefix_matching.exists(ip): - self.logger.debug(f'Already cached: {ip}') - continue - ip = ipaddress.ip_address(ip) - if ip.version == 4: - prefix = self.tree_v4.get_key(ip) - asns = self.tree_v4.get(ip) - else: - prefix = self.tree_v6.get_key(ip) - asns = self.tree_v6.get(ip) - if not prefix: - self.logger.warning(f'The IP {ip} does not seem to be announced') - continue - self.cache_prefix(pipe, ip, prefix, asns) - pipe.execute() - unset_running(self.__class__.__name__) diff --git a/bgpranking/sanitizer.py b/bgpranking/sanitizer.py index 948ed2f..3f504c2 100644 --- a/bgpranking/sanitizer.py +++ b/bgpranking/sanitizer.py @@ -1,13 +1,14 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +from datetime import timezone from dateutil import parser import logging from redis import StrictRedis -from .libs.helpers import shutdown_requested, set_running, unset_running, get_socket_path - import ipaddress +from .libs.helpers import shutdown_requested, set_running, unset_running, get_socket_path, get_ipasn, sanity_check_ipasn + class Sanitizer(): @@ -15,7 +16,7 @@ class Sanitizer(): self.__init_logger(loglevel) self.redis_intake = StrictRedis(unix_socket_path=get_socket_path('intake'), db=0, decode_responses=True) self.redis_sanitized = StrictRedis(unix_socket_path=get_socket_path('prepare'), db=0, decode_responses=True) - self.ris_cache = StrictRedis(unix_socket_path=get_socket_path('ris'), db=0, decode_responses=True) + self.ipasn = get_ipasn() self.logger.debug('Starting import') def __init_logger(self, loglevel): @@ -23,6 +24,13 @@ class Sanitizer(): self.logger.setLevel(loglevel) def sanitize(self): + ready, message = sanity_check_ipasn(self.ipasn) + if not ready: + # Try again later. + self.logger.warning(message) + return + self.logger.debug(message) + set_running(self.__class__.__name__) while True: if shutdown_requested(): @@ -30,12 +38,16 @@ class Sanitizer(): uuids = self.redis_intake.spop('intake', 100) if not uuids: break - for_ris_lookup = [] + for_cache = [] pipeline = self.redis_sanitized.pipeline(transaction=False) for uuid in uuids: data = self.redis_intake.hgetall(uuid) try: ip = ipaddress.ip_address(data['ip']) + if isinstance(ip, ipaddress.IPv6Address): + address_family = 'v6' + else: + address_family = 'v4' except ValueError: self.logger.info(f"Invalid IP address: {data['ip']}") continue @@ -43,15 +55,21 @@ class Sanitizer(): self.logger.info(f"The IP address {data['ip']} is not global") continue - date = parser.parse(data['datetime']).date().isoformat() - # NOTE: to consider: discard data with an old timestamp (define old) + datetime = parser.parse(data['datetime']) + if datetime.tzinfo: + # Make sure the datetime isn't TZ aware, and UTC. + datetime = datetime.astimezone(timezone.utc).replace(tzinfo=None) + + for_cache.append({'ip': str(ip), 'address_family': address_family, 'source': 'caida', + 'date': datetime.isoformat(), 'precision_delta': {'days': 3}}) # Add to temporay DB for further processing - for_ris_lookup.append(str(ip)) - pipeline.hmset(uuid, {'ip': str(ip), 'source': data['source'], - 'date': date, 'datetime': data['datetime']}) + pipeline.hmset(uuid, {'ip': str(ip), 'source': data['source'], 'address_family': address_family, + 'date': datetime.date().isoformat(), 'datetime': datetime.isoformat()}) pipeline.sadd('to_insert', uuid) pipeline.execute() self.redis_intake.delete(*uuids) - self.ris_cache.sadd('for_ris_lookup', *for_ris_lookup) + + # Just cache everything so the lookup scripts can do their thing. + self.ipasn.mass_cache(for_cache) unset_running(self.__class__.__name__) diff --git a/bin/loadprefixes.py b/bin/loadprefixes.py deleted file mode 100755 index c9c3334..0000000 --- a/bin/loadprefixes.py +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import logging -import requests - -from bgpranking.abstractmanager import AbstractManager -from bgpranking.prefixdb import PrefixDatabase - -logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s', - level=logging.INFO, datefmt='%I:%M:%S') - - -class PrefixDBManager(AbstractManager): - - def __init__(self, loglevel: int=logging.DEBUG): - super().__init__(loglevel) - self.prefix_db = PrefixDatabase(loglevel=loglevel) - - def _to_run_forever(self): - try: - if self.prefix_db.update_required(): - self.prefix_db.load_prefixes() - except requests.exceptions.ConnectionError as e: - self.logger.critical(f'Unable to download the prefix database: {e}') - - -if __name__ == '__main__': - p = PrefixDBManager() - p.run(sleep_in_sec=3600) diff --git a/bin/monitor.py b/bin/monitor.py index 72f4ce5..2c996d7 100755 --- a/bin/monitor.py +++ b/bin/monitor.py @@ -14,10 +14,7 @@ class MonitorManager(): self.monitor = Monitor() def get_values(self): - generic = self.monitor.get_values() - prefix_cache = self.monitor.info_prefix_cache() - running = self.monitor.get_runinng() - return generic, prefix_cache, running + return self.monitor.get_values() if __name__ == '__main__': diff --git a/bin/rislookup.py b/bin/rislookup.py deleted file mode 100755 index c7aa968..0000000 --- a/bin/rislookup.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import logging - -from bgpranking.abstractmanager import AbstractManager -from bgpranking.risfetcher import RISPrefixLookup - -logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s', - level=logging.INFO, datefmt='%I:%M:%S') - - -class RISLookupManager(AbstractManager): - - def __init__(self, loglevel: int=logging.INFO): - super().__init__(loglevel) - self.ris_fetcher = RISPrefixLookup(loglevel=loglevel) - - def _to_run_forever(self): - self.ris_fetcher.run() - - -if __name__ == '__main__': - rislookup = RISLookupManager() - rislookup.run(120) diff --git a/bin/run_backend.py b/bin/run_backend.py index de08f9f..5e3169c 100755 --- a/bin/run_backend.py +++ b/bin/run_backend.py @@ -12,7 +12,7 @@ import argparse def launch_cache(storage_directory: Path=None): if not storage_directory: storage_directory = get_homedir() - if not check_running('ris') and not check_running('prefixes'): + if not check_running('cache'): Popen(["./run_redis.sh"], cwd=(storage_directory / 'cache')) @@ -55,7 +55,7 @@ def launch_all(): def check_all(stop=False): - backends = [['ris', False], ['prefixes', False], ['storage', False], + backends = [['cache', False], ['storage', False], ['intake', False], ['prepare', False]] while True: for b in backends: diff --git a/bin/shutdown.py b/bin/shutdown.py index c64919f..d596135 100755 --- a/bin/shutdown.py +++ b/bin/shutdown.py @@ -6,7 +6,7 @@ import time from redis import StrictRedis if __name__ == '__main__': - r = StrictRedis(unix_socket_path=get_socket_path('prefixes'), db=1, decode_responses=True) + r = StrictRedis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True) r.set('shutdown', 1) while True: running = is_running() diff --git a/bin/start.py b/bin/start.py index b546102..69af911 100755 --- a/bin/start.py +++ b/bin/start.py @@ -10,8 +10,6 @@ if __name__ == '__main__': get_homedir() p = Popen(['run_backend.py', '--start']) p.wait() - Popen(['loadprefixes.py']) - Popen(['rislookup.py']) Popen(['fetcher.py']) Popen(['ssfetcher.py']) Popen(['parser.py']) diff --git a/cache/prefixes.conf b/cache/cache.conf similarity index 99% rename from cache/prefixes.conf rename to cache/cache.conf index b2b7a6c..33a9986 100644 --- a/cache/prefixes.conf +++ b/cache/cache.conf @@ -106,7 +106,7 @@ tcp-backlog 511 # incoming connections. There is no default, so Redis will not listen # on a unix socket when not specified. # -unixsocket prefixes.sock +unixsocket cache.sock unixsocketperm 700 # Close the connection after a client is idle for N seconds (0 to disable) @@ -168,7 +168,7 @@ loglevel notice # Specify the log file name. Also the empty string can be used to force # Redis to log on the standard output. Note that if you use standard # output for logging but daemonize, logs will be sent to /dev/null -logfile "prefixes.log" +logfile "cache.log" # To enable logging to the system logger, just set 'syslog-enabled' to yes, # and optionally update the other syslog parameters to suit your needs. diff --git a/cache/ris.conf b/cache/ris.conf deleted file mode 100644 index 7677d99..0000000 --- a/cache/ris.conf +++ /dev/null @@ -1,1317 +0,0 @@ -# Redis configuration file example. -# -# Note that in order to read the configuration file, Redis must be -# started with the file path as first argument: -# -# ./redis-server /path/to/redis.conf - -# Note on units: when memory size is needed, it is possible to specify -# it in the usual form of 1k 5GB 4M and so forth: -# -# 1k => 1000 bytes -# 1kb => 1024 bytes -# 1m => 1000000 bytes -# 1mb => 1024*1024 bytes -# 1g => 1000000000 bytes -# 1gb => 1024*1024*1024 bytes -# -# units are case insensitive so 1GB 1Gb 1gB are all the same. - -################################## INCLUDES ################################### - -# Include one or more other config files here. This is useful if you -# have a standard template that goes to all Redis servers but also need -# to customize a few per-server settings. Include files can include -# other files, so use this wisely. -# -# Notice option "include" won't be rewritten by command "CONFIG REWRITE" -# from admin or Redis Sentinel. Since Redis always uses the last processed -# line as value of a configuration directive, you'd better put includes -# at the beginning of this file to avoid overwriting config change at runtime. -# -# If instead you are interested in using includes to override configuration -# options, it is better to use include as the last line. -# -# include /path/to/local.conf -# include /path/to/other.conf - -################################## MODULES ##################################### - -# Load modules at startup. If the server is not able to load modules -# it will abort. It is possible to use multiple loadmodule directives. -# -# loadmodule /path/to/my_module.so -# loadmodule /path/to/other_module.so - -################################## NETWORK ##################################### - -# By default, if no "bind" configuration directive is specified, Redis listens -# for connections from all the network interfaces available on the server. -# It is possible to listen to just one or multiple selected interfaces using -# the "bind" configuration directive, followed by one or more IP addresses. -# -# Examples: -# -# bind 192.168.1.100 10.0.0.1 -# bind 127.0.0.1 ::1 -# -# ~~~ WARNING ~~~ If the computer running Redis is directly exposed to the -# internet, binding to all the interfaces is dangerous and will expose the -# instance to everybody on the internet. So by default we uncomment the -# following bind directive, that will force Redis to listen only into -# the IPv4 lookback interface address (this means Redis will be able to -# accept connections only from clients running into the same computer it -# is running). -# -# IF YOU ARE SURE YOU WANT YOUR INSTANCE TO LISTEN TO ALL THE INTERFACES -# JUST COMMENT THE FOLLOWING LINE. -# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -bind 127.0.0.1 - -# Protected mode is a layer of security protection, in order to avoid that -# Redis instances left open on the internet are accessed and exploited. -# -# When protected mode is on and if: -# -# 1) The server is not binding explicitly to a set of addresses using the -# "bind" directive. -# 2) No password is configured. -# -# The server only accepts connections from clients connecting from the -# IPv4 and IPv6 loopback addresses 127.0.0.1 and ::1, and from Unix domain -# sockets. -# -# By default protected mode is enabled. You should disable it only if -# you are sure you want clients from other hosts to connect to Redis -# even if no authentication is configured, nor a specific set of interfaces -# are explicitly listed using the "bind" directive. -protected-mode yes - -# Accept connections on the specified port, default is 6379 (IANA #815344). -# If port 0 is specified Redis will not listen on a TCP socket. -port 0 - -# TCP listen() backlog. -# -# In high requests-per-second environments you need an high backlog in order -# to avoid slow clients connections issues. Note that the Linux kernel -# will silently truncate it to the value of /proc/sys/net/core/somaxconn so -# make sure to raise both the value of somaxconn and tcp_max_syn_backlog -# in order to get the desired effect. -tcp-backlog 511 - -# Unix socket. -# -# Specify the path for the Unix socket that will be used to listen for -# incoming connections. There is no default, so Redis will not listen -# on a unix socket when not specified. -# -unixsocket ris.sock -unixsocketperm 700 - -# Close the connection after a client is idle for N seconds (0 to disable) -timeout 0 - -# TCP keepalive. -# -# If non-zero, use SO_KEEPALIVE to send TCP ACKs to clients in absence -# of communication. This is useful for two reasons: -# -# 1) Detect dead peers. -# 2) Take the connection alive from the point of view of network -# equipment in the middle. -# -# On Linux, the specified value (in seconds) is the period used to send ACKs. -# Note that to close the connection the double of the time is needed. -# On other kernels the period depends on the kernel configuration. -# -# A reasonable value for this option is 300 seconds, which is the new -# Redis default starting with Redis 3.2.1. -tcp-keepalive 300 - -################################# GENERAL ##################################### - -# By default Redis does not run as a daemon. Use 'yes' if you need it. -# Note that Redis will write a pid file in /var/run/redis.pid when daemonized. -daemonize yes - -# If you run Redis from upstart or systemd, Redis can interact with your -# supervision tree. Options: -# supervised no - no supervision interaction -# supervised upstart - signal upstart by putting Redis into SIGSTOP mode -# supervised systemd - signal systemd by writing READY=1 to $NOTIFY_SOCKET -# supervised auto - detect upstart or systemd method based on -# UPSTART_JOB or NOTIFY_SOCKET environment variables -# Note: these supervision methods only signal "process is ready." -# They do not enable continuous liveness pings back to your supervisor. -supervised no - -# If a pid file is specified, Redis writes it where specified at startup -# and removes it at exit. -# -# When the server runs non daemonized, no pid file is created if none is -# specified in the configuration. When the server is daemonized, the pid file -# is used even if not specified, defaulting to "/var/run/redis.pid". -# -# Creating a pid file is best effort: if Redis is not able to create it -# nothing bad happens, the server will start and run normally. -# pidfile /var/run/redis_6379.pid - -# Specify the server verbosity level. -# This can be one of: -# debug (a lot of information, useful for development/testing) -# verbose (many rarely useful info, but not a mess like the debug level) -# notice (moderately verbose, what you want in production probably) -# warning (only very important / critical messages are logged) -loglevel notice - -# Specify the log file name. Also the empty string can be used to force -# Redis to log on the standard output. Note that if you use standard -# output for logging but daemonize, logs will be sent to /dev/null -logfile "ris.log" - -# To enable logging to the system logger, just set 'syslog-enabled' to yes, -# and optionally update the other syslog parameters to suit your needs. -# syslog-enabled no - -# Specify the syslog identity. -# syslog-ident redis - -# Specify the syslog facility. Must be USER or between LOCAL0-LOCAL7. -# syslog-facility local0 - -# Set the number of databases. The default database is DB 0, you can select -# a different one on a per-connection basis using SELECT where -# dbid is a number between 0 and 'databases'-1 -databases 16 - -# By default Redis shows an ASCII art logo only when started to log to the -# standard output and if the standard output is a TTY. Basically this means -# that normally a logo is displayed only in interactive sessions. -# -# However it is possible to force the pre-4.0 behavior and always show a -# ASCII art logo in startup logs by setting the following option to yes. -always-show-logo yes - -################################ SNAPSHOTTING ################################ -# -# Save the DB on disk: -# -# save -# -# Will save the DB if both the given number of seconds and the given -# number of write operations against the DB occurred. -# -# In the example below the behaviour will be to save: -# after 900 sec (15 min) if at least 1 key changed -# after 300 sec (5 min) if at least 10 keys changed -# after 60 sec if at least 10000 keys changed -# -# Note: you can disable saving completely by commenting out all "save" lines. -# -# It is also possible to remove all the previously configured save -# points by adding a save directive with a single empty string argument -# like in the following example: -# -# save "" - -#save 900 1 -#save 300 10 -save 600 10000 - -# By default Redis will stop accepting writes if RDB snapshots are enabled -# (at least one save point) and the latest background save failed. -# This will make the user aware (in a hard way) that data is not persisting -# on disk properly, otherwise chances are that no one will notice and some -# disaster will happen. -# -# If the background saving process will start working again Redis will -# automatically allow writes again. -# -# However if you have setup your proper monitoring of the Redis server -# and persistence, you may want to disable this feature so that Redis will -# continue to work as usual even if there are problems with disk, -# permissions, and so forth. -stop-writes-on-bgsave-error yes - -# Compress string objects using LZF when dump .rdb databases? -# For default that's set to 'yes' as it's almost always a win. -# If you want to save some CPU in the saving child set it to 'no' but -# the dataset will likely be bigger if you have compressible values or keys. -rdbcompression yes - -# Since version 5 of RDB a CRC64 checksum is placed at the end of the file. -# This makes the format more resistant to corruption but there is a performance -# hit to pay (around 10%) when saving and loading RDB files, so you can disable it -# for maximum performances. -# -# RDB files created with checksum disabled have a checksum of zero that will -# tell the loading code to skip the check. -rdbchecksum yes - -# The filename where to dump the DB -dbfilename ris.rdb - -# The working directory. -# -# The DB will be written inside this directory, with the filename specified -# above using the 'dbfilename' configuration directive. -# -# The Append Only File will also be created inside this directory. -# -# Note that you must specify a directory here, not a file name. -dir ./ - -################################# REPLICATION ################################# - -# Master-Slave replication. Use slaveof to make a Redis instance a copy of -# another Redis server. A few things to understand ASAP about Redis replication. -# -# 1) Redis replication is asynchronous, but you can configure a master to -# stop accepting writes if it appears to be not connected with at least -# a given number of slaves. -# 2) Redis slaves are able to perform a partial resynchronization with the -# master if the replication link is lost for a relatively small amount of -# time. You may want to configure the replication backlog size (see the next -# sections of this file) with a sensible value depending on your needs. -# 3) Replication is automatic and does not need user intervention. After a -# network partition slaves automatically try to reconnect to masters -# and resynchronize with them. -# -# slaveof - -# If the master is password protected (using the "requirepass" configuration -# directive below) it is possible to tell the slave to authenticate before -# starting the replication synchronization process, otherwise the master will -# refuse the slave request. -# -# masterauth - -# When a slave loses its connection with the master, or when the replication -# is still in progress, the slave can act in two different ways: -# -# 1) if slave-serve-stale-data is set to 'yes' (the default) the slave will -# still reply to client requests, possibly with out of date data, or the -# data set may just be empty if this is the first synchronization. -# -# 2) if slave-serve-stale-data is set to 'no' the slave will reply with -# an error "SYNC with master in progress" to all the kind of commands -# but to INFO and SLAVEOF. -# -slave-serve-stale-data yes - -# You can configure a slave instance to accept writes or not. Writing against -# a slave instance may be useful to store some ephemeral data (because data -# written on a slave will be easily deleted after resync with the master) but -# may also cause problems if clients are writing to it because of a -# misconfiguration. -# -# Since Redis 2.6 by default slaves are read-only. -# -# Note: read only slaves are not designed to be exposed to untrusted clients -# on the internet. It's just a protection layer against misuse of the instance. -# Still a read only slave exports by default all the administrative commands -# such as CONFIG, DEBUG, and so forth. To a limited extent you can improve -# security of read only slaves using 'rename-command' to shadow all the -# administrative / dangerous commands. -slave-read-only yes - -# Replication SYNC strategy: disk or socket. -# -# ------------------------------------------------------- -# WARNING: DISKLESS REPLICATION IS EXPERIMENTAL CURRENTLY -# ------------------------------------------------------- -# -# New slaves and reconnecting slaves that are not able to continue the replication -# process just receiving differences, need to do what is called a "full -# synchronization". An RDB file is transmitted from the master to the slaves. -# The transmission can happen in two different ways: -# -# 1) Disk-backed: The Redis master creates a new process that writes the RDB -# file on disk. Later the file is transferred by the parent -# process to the slaves incrementally. -# 2) Diskless: The Redis master creates a new process that directly writes the -# RDB file to slave sockets, without touching the disk at all. -# -# With disk-backed replication, while the RDB file is generated, more slaves -# can be queued and served with the RDB file as soon as the current child producing -# the RDB file finishes its work. With diskless replication instead once -# the transfer starts, new slaves arriving will be queued and a new transfer -# will start when the current one terminates. -# -# When diskless replication is used, the master waits a configurable amount of -# time (in seconds) before starting the transfer in the hope that multiple slaves -# will arrive and the transfer can be parallelized. -# -# With slow disks and fast (large bandwidth) networks, diskless replication -# works better. -repl-diskless-sync no - -# When diskless replication is enabled, it is possible to configure the delay -# the server waits in order to spawn the child that transfers the RDB via socket -# to the slaves. -# -# This is important since once the transfer starts, it is not possible to serve -# new slaves arriving, that will be queued for the next RDB transfer, so the server -# waits a delay in order to let more slaves arrive. -# -# The delay is specified in seconds, and by default is 5 seconds. To disable -# it entirely just set it to 0 seconds and the transfer will start ASAP. -repl-diskless-sync-delay 5 - -# Slaves send PINGs to server in a predefined interval. It's possible to change -# this interval with the repl_ping_slave_period option. The default value is 10 -# seconds. -# -# repl-ping-slave-period 10 - -# The following option sets the replication timeout for: -# -# 1) Bulk transfer I/O during SYNC, from the point of view of slave. -# 2) Master timeout from the point of view of slaves (data, pings). -# 3) Slave timeout from the point of view of masters (REPLCONF ACK pings). -# -# It is important to make sure that this value is greater than the value -# specified for repl-ping-slave-period otherwise a timeout will be detected -# every time there is low traffic between the master and the slave. -# -# repl-timeout 60 - -# Disable TCP_NODELAY on the slave socket after SYNC? -# -# If you select "yes" Redis will use a smaller number of TCP packets and -# less bandwidth to send data to slaves. But this can add a delay for -# the data to appear on the slave side, up to 40 milliseconds with -# Linux kernels using a default configuration. -# -# If you select "no" the delay for data to appear on the slave side will -# be reduced but more bandwidth will be used for replication. -# -# By default we optimize for low latency, but in very high traffic conditions -# or when the master and slaves are many hops away, turning this to "yes" may -# be a good idea. -repl-disable-tcp-nodelay no - -# Set the replication backlog size. The backlog is a buffer that accumulates -# slave data when slaves are disconnected for some time, so that when a slave -# wants to reconnect again, often a full resync is not needed, but a partial -# resync is enough, just passing the portion of data the slave missed while -# disconnected. -# -# The bigger the replication backlog, the longer the time the slave can be -# disconnected and later be able to perform a partial resynchronization. -# -# The backlog is only allocated once there is at least a slave connected. -# -# repl-backlog-size 1mb - -# After a master has no longer connected slaves for some time, the backlog -# will be freed. The following option configures the amount of seconds that -# need to elapse, starting from the time the last slave disconnected, for -# the backlog buffer to be freed. -# -# Note that slaves never free the backlog for timeout, since they may be -# promoted to masters later, and should be able to correctly "partially -# resynchronize" with the slaves: hence they should always accumulate backlog. -# -# A value of 0 means to never release the backlog. -# -# repl-backlog-ttl 3600 - -# The slave priority is an integer number published by Redis in the INFO output. -# It is used by Redis Sentinel in order to select a slave to promote into a -# master if the master is no longer working correctly. -# -# A slave with a low priority number is considered better for promotion, so -# for instance if there are three slaves with priority 10, 100, 25 Sentinel will -# pick the one with priority 10, that is the lowest. -# -# However a special priority of 0 marks the slave as not able to perform the -# role of master, so a slave with priority of 0 will never be selected by -# Redis Sentinel for promotion. -# -# By default the priority is 100. -slave-priority 100 - -# It is possible for a master to stop accepting writes if there are less than -# N slaves connected, having a lag less or equal than M seconds. -# -# The N slaves need to be in "online" state. -# -# The lag in seconds, that must be <= the specified value, is calculated from -# the last ping received from the slave, that is usually sent every second. -# -# This option does not GUARANTEE that N replicas will accept the write, but -# will limit the window of exposure for lost writes in case not enough slaves -# are available, to the specified number of seconds. -# -# For example to require at least 3 slaves with a lag <= 10 seconds use: -# -# min-slaves-to-write 3 -# min-slaves-max-lag 10 -# -# Setting one or the other to 0 disables the feature. -# -# By default min-slaves-to-write is set to 0 (feature disabled) and -# min-slaves-max-lag is set to 10. - -# A Redis master is able to list the address and port of the attached -# slaves in different ways. For example the "INFO replication" section -# offers this information, which is used, among other tools, by -# Redis Sentinel in order to discover slave instances. -# Another place where this info is available is in the output of the -# "ROLE" command of a master. -# -# The listed IP and address normally reported by a slave is obtained -# in the following way: -# -# IP: The address is auto detected by checking the peer address -# of the socket used by the slave to connect with the master. -# -# Port: The port is communicated by the slave during the replication -# handshake, and is normally the port that the slave is using to -# list for connections. -# -# However when port forwarding or Network Address Translation (NAT) is -# used, the slave may be actually reachable via different IP and port -# pairs. The following two options can be used by a slave in order to -# report to its master a specific set of IP and port, so that both INFO -# and ROLE will report those values. -# -# There is no need to use both the options if you need to override just -# the port or the IP address. -# -# slave-announce-ip 5.5.5.5 -# slave-announce-port 1234 - -################################## SECURITY ################################### - -# Require clients to issue AUTH before processing any other -# commands. This might be useful in environments in which you do not trust -# others with access to the host running redis-server. -# -# This should stay commented out for backward compatibility and because most -# people do not need auth (e.g. they run their own servers). -# -# Warning: since Redis is pretty fast an outside user can try up to -# 150k passwords per second against a good box. This means that you should -# use a very strong password otherwise it will be very easy to break. -# -# requirepass foobared - -# Command renaming. -# -# It is possible to change the name of dangerous commands in a shared -# environment. For instance the CONFIG command may be renamed into something -# hard to guess so that it will still be available for internal-use tools -# but not available for general clients. -# -# Example: -# -# rename-command CONFIG b840fc02d524045429941cc15f59e41cb7be6c52 -# -# It is also possible to completely kill a command by renaming it into -# an empty string: -# -# rename-command CONFIG "" -# -# Please note that changing the name of commands that are logged into the -# AOF file or transmitted to slaves may cause problems. - -################################### CLIENTS #################################### - -# Set the max number of connected clients at the same time. By default -# this limit is set to 10000 clients, however if the Redis server is not -# able to configure the process file limit to allow for the specified limit -# the max number of allowed clients is set to the current file limit -# minus 32 (as Redis reserves a few file descriptors for internal uses). -# -# Once the limit is reached Redis will close all the new connections sending -# an error 'max number of clients reached'. -# -# maxclients 10000 - -############################## MEMORY MANAGEMENT ################################ - -# Set a memory usage limit to the specified amount of bytes. -# When the memory limit is reached Redis will try to remove keys -# according to the eviction policy selected (see maxmemory-policy). -# -# If Redis can't remove keys according to the policy, or if the policy is -# set to 'noeviction', Redis will start to reply with errors to commands -# that would use more memory, like SET, LPUSH, and so on, and will continue -# to reply to read-only commands like GET. -# -# This option is usually useful when using Redis as an LRU or LFU cache, or to -# set a hard memory limit for an instance (using the 'noeviction' policy). -# -# WARNING: If you have slaves attached to an instance with maxmemory on, -# the size of the output buffers needed to feed the slaves are subtracted -# from the used memory count, so that network problems / resyncs will -# not trigger a loop where keys are evicted, and in turn the output -# buffer of slaves is full with DELs of keys evicted triggering the deletion -# of more keys, and so forth until the database is completely emptied. -# -# In short... if you have slaves attached it is suggested that you set a lower -# limit for maxmemory so that there is some free RAM on the system for slave -# output buffers (but this is not needed if the policy is 'noeviction'). -# -# maxmemory - -# MAXMEMORY POLICY: how Redis will select what to remove when maxmemory -# is reached. You can select among five behaviors: -# -# volatile-lru -> Evict using approximated LRU among the keys with an expire set. -# allkeys-lru -> Evict any key using approximated LRU. -# volatile-lfu -> Evict using approximated LFU among the keys with an expire set. -# allkeys-lfu -> Evict any key using approximated LFU. -# volatile-random -> Remove a random key among the ones with an expire set. -# allkeys-random -> Remove a random key, any key. -# volatile-ttl -> Remove the key with the nearest expire time (minor TTL) -# noeviction -> Don't evict anything, just return an error on write operations. -# -# LRU means Least Recently Used -# LFU means Least Frequently Used -# -# Both LRU, LFU and volatile-ttl are implemented using approximated -# randomized algorithms. -# -# Note: with any of the above policies, Redis will return an error on write -# operations, when there are no suitable keys for eviction. -# -# At the date of writing these commands are: set setnx setex append -# incr decr rpush lpush rpushx lpushx linsert lset rpoplpush sadd -# sinter sinterstore sunion sunionstore sdiff sdiffstore zadd zincrby -# zunionstore zinterstore hset hsetnx hmset hincrby incrby decrby -# getset mset msetnx exec sort -# -# The default is: -# -# maxmemory-policy noeviction - -# LRU, LFU and minimal TTL algorithms are not precise algorithms but approximated -# algorithms (in order to save memory), so you can tune it for speed or -# accuracy. For default Redis will check five keys and pick the one that was -# used less recently, you can change the sample size using the following -# configuration directive. -# -# The default of 5 produces good enough results. 10 Approximates very closely -# true LRU but costs more CPU. 3 is faster but not very accurate. -# -# maxmemory-samples 5 - -############################# LAZY FREEING #################################### - -# Redis has two primitives to delete keys. One is called DEL and is a blocking -# deletion of the object. It means that the server stops processing new commands -# in order to reclaim all the memory associated with an object in a synchronous -# way. If the key deleted is associated with a small object, the time needed -# in order to execute the DEL command is very small and comparable to most other -# O(1) or O(log_N) commands in Redis. However if the key is associated with an -# aggregated value containing millions of elements, the server can block for -# a long time (even seconds) in order to complete the operation. -# -# For the above reasons Redis also offers non blocking deletion primitives -# such as UNLINK (non blocking DEL) and the ASYNC option of FLUSHALL and -# FLUSHDB commands, in order to reclaim memory in background. Those commands -# are executed in constant time. Another thread will incrementally free the -# object in the background as fast as possible. -# -# DEL, UNLINK and ASYNC option of FLUSHALL and FLUSHDB are user-controlled. -# It's up to the design of the application to understand when it is a good -# idea to use one or the other. However the Redis server sometimes has to -# delete keys or flush the whole database as a side effect of other operations. -# Specifically Redis deletes objects independently of a user call in the -# following scenarios: -# -# 1) On eviction, because of the maxmemory and maxmemory policy configurations, -# in order to make room for new data, without going over the specified -# memory limit. -# 2) Because of expire: when a key with an associated time to live (see the -# EXPIRE command) must be deleted from memory. -# 3) Because of a side effect of a command that stores data on a key that may -# already exist. For example the RENAME command may delete the old key -# content when it is replaced with another one. Similarly SUNIONSTORE -# or SORT with STORE option may delete existing keys. The SET command -# itself removes any old content of the specified key in order to replace -# it with the specified string. -# 4) During replication, when a slave performs a full resynchronization with -# its master, the content of the whole database is removed in order to -# load the RDB file just transfered. -# -# In all the above cases the default is to delete objects in a blocking way, -# like if DEL was called. However you can configure each case specifically -# in order to instead release memory in a non-blocking way like if UNLINK -# was called, using the following configuration directives: - -lazyfree-lazy-eviction no -lazyfree-lazy-expire no -lazyfree-lazy-server-del no -slave-lazy-flush no - -############################## APPEND ONLY MODE ############################### - -# By default Redis asynchronously dumps the dataset on disk. This mode is -# good enough in many applications, but an issue with the Redis process or -# a power outage may result into a few minutes of writes lost (depending on -# the configured save points). -# -# The Append Only File is an alternative persistence mode that provides -# much better durability. For instance using the default data fsync policy -# (see later in the config file) Redis can lose just one second of writes in a -# dramatic event like a server power outage, or a single write if something -# wrong with the Redis process itself happens, but the operating system is -# still running correctly. -# -# AOF and RDB persistence can be enabled at the same time without problems. -# If the AOF is enabled on startup Redis will load the AOF, that is the file -# with the better durability guarantees. -# -# Please check http://redis.io/topics/persistence for more information. - -appendonly no - -# The name of the append only file (default: "appendonly.aof") - -appendfilename "appendonly.aof" - -# The fsync() call tells the Operating System to actually write data on disk -# instead of waiting for more data in the output buffer. Some OS will really flush -# data on disk, some other OS will just try to do it ASAP. -# -# Redis supports three different modes: -# -# no: don't fsync, just let the OS flush the data when it wants. Faster. -# always: fsync after every write to the append only log. Slow, Safest. -# everysec: fsync only one time every second. Compromise. -# -# The default is "everysec", as that's usually the right compromise between -# speed and data safety. It's up to you to understand if you can relax this to -# "no" that will let the operating system flush the output buffer when -# it wants, for better performances (but if you can live with the idea of -# some data loss consider the default persistence mode that's snapshotting), -# or on the contrary, use "always" that's very slow but a bit safer than -# everysec. -# -# More details please check the following article: -# http://antirez.com/post/redis-persistence-demystified.html -# -# If unsure, use "everysec". - -# appendfsync always -appendfsync everysec -# appendfsync no - -# When the AOF fsync policy is set to always or everysec, and a background -# saving process (a background save or AOF log background rewriting) is -# performing a lot of I/O against the disk, in some Linux configurations -# Redis may block too long on the fsync() call. Note that there is no fix for -# this currently, as even performing fsync in a different thread will block -# our synchronous write(2) call. -# -# In order to mitigate this problem it's possible to use the following option -# that will prevent fsync() from being called in the main process while a -# BGSAVE or BGREWRITEAOF is in progress. -# -# This means that while another child is saving, the durability of Redis is -# the same as "appendfsync none". In practical terms, this means that it is -# possible to lose up to 30 seconds of log in the worst scenario (with the -# default Linux settings). -# -# If you have latency problems turn this to "yes". Otherwise leave it as -# "no" that is the safest pick from the point of view of durability. - -no-appendfsync-on-rewrite no - -# Automatic rewrite of the append only file. -# Redis is able to automatically rewrite the log file implicitly calling -# BGREWRITEAOF when the AOF log size grows by the specified percentage. -# -# This is how it works: Redis remembers the size of the AOF file after the -# latest rewrite (if no rewrite has happened since the restart, the size of -# the AOF at startup is used). -# -# This base size is compared to the current size. If the current size is -# bigger than the specified percentage, the rewrite is triggered. Also -# you need to specify a minimal size for the AOF file to be rewritten, this -# is useful to avoid rewriting the AOF file even if the percentage increase -# is reached but it is still pretty small. -# -# Specify a percentage of zero in order to disable the automatic AOF -# rewrite feature. - -auto-aof-rewrite-percentage 100 -auto-aof-rewrite-min-size 64mb - -# An AOF file may be found to be truncated at the end during the Redis -# startup process, when the AOF data gets loaded back into memory. -# This may happen when the system where Redis is running -# crashes, especially when an ext4 filesystem is mounted without the -# data=ordered option (however this can't happen when Redis itself -# crashes or aborts but the operating system still works correctly). -# -# Redis can either exit with an error when this happens, or load as much -# data as possible (the default now) and start if the AOF file is found -# to be truncated at the end. The following option controls this behavior. -# -# If aof-load-truncated is set to yes, a truncated AOF file is loaded and -# the Redis server starts emitting a log to inform the user of the event. -# Otherwise if the option is set to no, the server aborts with an error -# and refuses to start. When the option is set to no, the user requires -# to fix the AOF file using the "redis-check-aof" utility before to restart -# the server. -# -# Note that if the AOF file will be found to be corrupted in the middle -# the server will still exit with an error. This option only applies when -# Redis will try to read more data from the AOF file but not enough bytes -# will be found. -aof-load-truncated yes - -# When rewriting the AOF file, Redis is able to use an RDB preamble in the -# AOF file for faster rewrites and recoveries. When this option is turned -# on the rewritten AOF file is composed of two different stanzas: -# -# [RDB file][AOF tail] -# -# When loading Redis recognizes that the AOF file starts with the "REDIS" -# string and loads the prefixed RDB file, and continues loading the AOF -# tail. -# -# This is currently turned off by default in order to avoid the surprise -# of a format change, but will at some point be used as the default. -aof-use-rdb-preamble no - -################################ LUA SCRIPTING ############################### - -# Max execution time of a Lua script in milliseconds. -# -# If the maximum execution time is reached Redis will log that a script is -# still in execution after the maximum allowed time and will start to -# reply to queries with an error. -# -# When a long running script exceeds the maximum execution time only the -# SCRIPT KILL and SHUTDOWN NOSAVE commands are available. The first can be -# used to stop a script that did not yet called write commands. The second -# is the only way to shut down the server in the case a write command was -# already issued by the script but the user doesn't want to wait for the natural -# termination of the script. -# -# Set it to 0 or a negative value for unlimited execution without warnings. -lua-time-limit 5000 - -################################ REDIS CLUSTER ############################### -# -# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -# WARNING EXPERIMENTAL: Redis Cluster is considered to be stable code, however -# in order to mark it as "mature" we need to wait for a non trivial percentage -# of users to deploy it in production. -# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -# -# Normal Redis instances can't be part of a Redis Cluster; only nodes that are -# started as cluster nodes can. In order to start a Redis instance as a -# cluster node enable the cluster support uncommenting the following: -# -# cluster-enabled yes - -# Every cluster node has a cluster configuration file. This file is not -# intended to be edited by hand. It is created and updated by Redis nodes. -# Every Redis Cluster node requires a different cluster configuration file. -# Make sure that instances running in the same system do not have -# overlapping cluster configuration file names. -# -# cluster-config-file nodes-6379.conf - -# Cluster node timeout is the amount of milliseconds a node must be unreachable -# for it to be considered in failure state. -# Most other internal time limits are multiple of the node timeout. -# -# cluster-node-timeout 15000 - -# A slave of a failing master will avoid to start a failover if its data -# looks too old. -# -# There is no simple way for a slave to actually have an exact measure of -# its "data age", so the following two checks are performed: -# -# 1) If there are multiple slaves able to failover, they exchange messages -# in order to try to give an advantage to the slave with the best -# replication offset (more data from the master processed). -# Slaves will try to get their rank by offset, and apply to the start -# of the failover a delay proportional to their rank. -# -# 2) Every single slave computes the time of the last interaction with -# its master. This can be the last ping or command received (if the master -# is still in the "connected" state), or the time that elapsed since the -# disconnection with the master (if the replication link is currently down). -# If the last interaction is too old, the slave will not try to failover -# at all. -# -# The point "2" can be tuned by user. Specifically a slave will not perform -# the failover if, since the last interaction with the master, the time -# elapsed is greater than: -# -# (node-timeout * slave-validity-factor) + repl-ping-slave-period -# -# So for example if node-timeout is 30 seconds, and the slave-validity-factor -# is 10, and assuming a default repl-ping-slave-period of 10 seconds, the -# slave will not try to failover if it was not able to talk with the master -# for longer than 310 seconds. -# -# A large slave-validity-factor may allow slaves with too old data to failover -# a master, while a too small value may prevent the cluster from being able to -# elect a slave at all. -# -# For maximum availability, it is possible to set the slave-validity-factor -# to a value of 0, which means, that slaves will always try to failover the -# master regardless of the last time they interacted with the master. -# (However they'll always try to apply a delay proportional to their -# offset rank). -# -# Zero is the only value able to guarantee that when all the partitions heal -# the cluster will always be able to continue. -# -# cluster-slave-validity-factor 10 - -# Cluster slaves are able to migrate to orphaned masters, that are masters -# that are left without working slaves. This improves the cluster ability -# to resist to failures as otherwise an orphaned master can't be failed over -# in case of failure if it has no working slaves. -# -# Slaves migrate to orphaned masters only if there are still at least a -# given number of other working slaves for their old master. This number -# is the "migration barrier". A migration barrier of 1 means that a slave -# will migrate only if there is at least 1 other working slave for its master -# and so forth. It usually reflects the number of slaves you want for every -# master in your cluster. -# -# Default is 1 (slaves migrate only if their masters remain with at least -# one slave). To disable migration just set it to a very large value. -# A value of 0 can be set but is useful only for debugging and dangerous -# in production. -# -# cluster-migration-barrier 1 - -# By default Redis Cluster nodes stop accepting queries if they detect there -# is at least an hash slot uncovered (no available node is serving it). -# This way if the cluster is partially down (for example a range of hash slots -# are no longer covered) all the cluster becomes, eventually, unavailable. -# It automatically returns available as soon as all the slots are covered again. -# -# However sometimes you want the subset of the cluster which is working, -# to continue to accept queries for the part of the key space that is still -# covered. In order to do so, just set the cluster-require-full-coverage -# option to no. -# -# cluster-require-full-coverage yes - -# This option, when set to yes, prevents slaves from trying to failover its -# master during master failures. However the master can still perform a -# manual failover, if forced to do so. -# -# This is useful in different scenarios, especially in the case of multiple -# data center operations, where we want one side to never be promoted if not -# in the case of a total DC failure. -# -# cluster-slave-no-failover no - -# In order to setup your cluster make sure to read the documentation -# available at http://redis.io web site. - -########################## CLUSTER DOCKER/NAT support ######################## - -# In certain deployments, Redis Cluster nodes address discovery fails, because -# addresses are NAT-ted or because ports are forwarded (the typical case is -# Docker and other containers). -# -# In order to make Redis Cluster working in such environments, a static -# configuration where each node knows its public address is needed. The -# following two options are used for this scope, and are: -# -# * cluster-announce-ip -# * cluster-announce-port -# * cluster-announce-bus-port -# -# Each instruct the node about its address, client port, and cluster message -# bus port. The information is then published in the header of the bus packets -# so that other nodes will be able to correctly map the address of the node -# publishing the information. -# -# If the above options are not used, the normal Redis Cluster auto-detection -# will be used instead. -# -# Note that when remapped, the bus port may not be at the fixed offset of -# clients port + 10000, so you can specify any port and bus-port depending -# on how they get remapped. If the bus-port is not set, a fixed offset of -# 10000 will be used as usually. -# -# Example: -# -# cluster-announce-ip 10.1.1.5 -# cluster-announce-port 6379 -# cluster-announce-bus-port 6380 - -################################## SLOW LOG ################################### - -# The Redis Slow Log is a system to log queries that exceeded a specified -# execution time. The execution time does not include the I/O operations -# like talking with the client, sending the reply and so forth, -# but just the time needed to actually execute the command (this is the only -# stage of command execution where the thread is blocked and can not serve -# other requests in the meantime). -# -# You can configure the slow log with two parameters: one tells Redis -# what is the execution time, in microseconds, to exceed in order for the -# command to get logged, and the other parameter is the length of the -# slow log. When a new command is logged the oldest one is removed from the -# queue of logged commands. - -# The following time is expressed in microseconds, so 1000000 is equivalent -# to one second. Note that a negative number disables the slow log, while -# a value of zero forces the logging of every command. -slowlog-log-slower-than 10000 - -# There is no limit to this length. Just be aware that it will consume memory. -# You can reclaim memory used by the slow log with SLOWLOG RESET. -slowlog-max-len 128 - -################################ LATENCY MONITOR ############################## - -# The Redis latency monitoring subsystem samples different operations -# at runtime in order to collect data related to possible sources of -# latency of a Redis instance. -# -# Via the LATENCY command this information is available to the user that can -# print graphs and obtain reports. -# -# The system only logs operations that were performed in a time equal or -# greater than the amount of milliseconds specified via the -# latency-monitor-threshold configuration directive. When its value is set -# to zero, the latency monitor is turned off. -# -# By default latency monitoring is disabled since it is mostly not needed -# if you don't have latency issues, and collecting data has a performance -# impact, that while very small, can be measured under big load. Latency -# monitoring can easily be enabled at runtime using the command -# "CONFIG SET latency-monitor-threshold " if needed. -latency-monitor-threshold 0 - -############################# EVENT NOTIFICATION ############################## - -# Redis can notify Pub/Sub clients about events happening in the key space. -# This feature is documented at http://redis.io/topics/notifications -# -# For instance if keyspace events notification is enabled, and a client -# performs a DEL operation on key "foo" stored in the Database 0, two -# messages will be published via Pub/Sub: -# -# PUBLISH __keyspace@0__:foo del -# PUBLISH __keyevent@0__:del foo -# -# It is possible to select the events that Redis will notify among a set -# of classes. Every class is identified by a single character: -# -# K Keyspace events, published with __keyspace@__ prefix. -# E Keyevent events, published with __keyevent@__ prefix. -# g Generic commands (non-type specific) like DEL, EXPIRE, RENAME, ... -# $ String commands -# l List commands -# s Set commands -# h Hash commands -# z Sorted set commands -# x Expired events (events generated every time a key expires) -# e Evicted events (events generated when a key is evicted for maxmemory) -# A Alias for g$lshzxe, so that the "AKE" string means all the events. -# -# The "notify-keyspace-events" takes as argument a string that is composed -# of zero or multiple characters. The empty string means that notifications -# are disabled. -# -# Example: to enable list and generic events, from the point of view of the -# event name, use: -# -# notify-keyspace-events Elg -# -# Example 2: to get the stream of the expired keys subscribing to channel -# name __keyevent@0__:expired use: -# -# notify-keyspace-events Ex -# -# By default all notifications are disabled because most users don't need -# this feature and the feature has some overhead. Note that if you don't -# specify at least one of K or E, no events will be delivered. -notify-keyspace-events "" - -############################### ADVANCED CONFIG ############################### - -# Hashes are encoded using a memory efficient data structure when they have a -# small number of entries, and the biggest entry does not exceed a given -# threshold. These thresholds can be configured using the following directives. -hash-max-ziplist-entries 512 -hash-max-ziplist-value 64 - -# Lists are also encoded in a special way to save a lot of space. -# The number of entries allowed per internal list node can be specified -# as a fixed maximum size or a maximum number of elements. -# For a fixed maximum size, use -5 through -1, meaning: -# -5: max size: 64 Kb <-- not recommended for normal workloads -# -4: max size: 32 Kb <-- not recommended -# -3: max size: 16 Kb <-- probably not recommended -# -2: max size: 8 Kb <-- good -# -1: max size: 4 Kb <-- good -# Positive numbers mean store up to _exactly_ that number of elements -# per list node. -# The highest performing option is usually -2 (8 Kb size) or -1 (4 Kb size), -# but if your use case is unique, adjust the settings as necessary. -list-max-ziplist-size -2 - -# Lists may also be compressed. -# Compress depth is the number of quicklist ziplist nodes from *each* side of -# the list to *exclude* from compression. The head and tail of the list -# are always uncompressed for fast push/pop operations. Settings are: -# 0: disable all list compression -# 1: depth 1 means "don't start compressing until after 1 node into the list, -# going from either the head or tail" -# So: [head]->node->node->...->node->[tail] -# [head], [tail] will always be uncompressed; inner nodes will compress. -# 2: [head]->[next]->node->node->...->node->[prev]->[tail] -# 2 here means: don't compress head or head->next or tail->prev or tail, -# but compress all nodes between them. -# 3: [head]->[next]->[next]->node->node->...->node->[prev]->[prev]->[tail] -# etc. -list-compress-depth 0 - -# Sets have a special encoding in just one case: when a set is composed -# of just strings that happen to be integers in radix 10 in the range -# of 64 bit signed integers. -# The following configuration setting sets the limit in the size of the -# set in order to use this special memory saving encoding. -set-max-intset-entries 512 - -# Similarly to hashes and lists, sorted sets are also specially encoded in -# order to save a lot of space. This encoding is only used when the length and -# elements of a sorted set are below the following limits: -zset-max-ziplist-entries 128 -zset-max-ziplist-value 64 - -# HyperLogLog sparse representation bytes limit. The limit includes the -# 16 bytes header. When an HyperLogLog using the sparse representation crosses -# this limit, it is converted into the dense representation. -# -# A value greater than 16000 is totally useless, since at that point the -# dense representation is more memory efficient. -# -# The suggested value is ~ 3000 in order to have the benefits of -# the space efficient encoding without slowing down too much PFADD, -# which is O(N) with the sparse encoding. The value can be raised to -# ~ 10000 when CPU is not a concern, but space is, and the data set is -# composed of many HyperLogLogs with cardinality in the 0 - 15000 range. -hll-sparse-max-bytes 3000 - -# Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in -# order to help rehashing the main Redis hash table (the one mapping top-level -# keys to values). The hash table implementation Redis uses (see dict.c) -# performs a lazy rehashing: the more operation you run into a hash table -# that is rehashing, the more rehashing "steps" are performed, so if the -# server is idle the rehashing is never complete and some more memory is used -# by the hash table. -# -# The default is to use this millisecond 10 times every second in order to -# actively rehash the main dictionaries, freeing memory when possible. -# -# If unsure: -# use "activerehashing no" if you have hard latency requirements and it is -# not a good thing in your environment that Redis can reply from time to time -# to queries with 2 milliseconds delay. -# -# use "activerehashing yes" if you don't have such hard requirements but -# want to free memory asap when possible. -activerehashing yes - -# The client output buffer limits can be used to force disconnection of clients -# that are not reading data from the server fast enough for some reason (a -# common reason is that a Pub/Sub client can't consume messages as fast as the -# publisher can produce them). -# -# The limit can be set differently for the three different classes of clients: -# -# normal -> normal clients including MONITOR clients -# slave -> slave clients -# pubsub -> clients subscribed to at least one pubsub channel or pattern -# -# The syntax of every client-output-buffer-limit directive is the following: -# -# client-output-buffer-limit -# -# A client is immediately disconnected once the hard limit is reached, or if -# the soft limit is reached and remains reached for the specified number of -# seconds (continuously). -# So for instance if the hard limit is 32 megabytes and the soft limit is -# 16 megabytes / 10 seconds, the client will get disconnected immediately -# if the size of the output buffers reach 32 megabytes, but will also get -# disconnected if the client reaches 16 megabytes and continuously overcomes -# the limit for 10 seconds. -# -# By default normal clients are not limited because they don't receive data -# without asking (in a push way), but just after a request, so only -# asynchronous clients may create a scenario where data is requested faster -# than it can read. -# -# Instead there is a default limit for pubsub and slave clients, since -# subscribers and slaves receive data in a push fashion. -# -# Both the hard or the soft limit can be disabled by setting them to zero. -client-output-buffer-limit normal 0 0 0 -client-output-buffer-limit slave 256mb 64mb 60 -client-output-buffer-limit pubsub 32mb 8mb 60 - -# Client query buffers accumulate new commands. They are limited to a fixed -# amount by default in order to avoid that a protocol desynchronization (for -# instance due to a bug in the client) will lead to unbound memory usage in -# the query buffer. However you can configure it here if you have very special -# needs, such us huge multi/exec requests or alike. -# -# client-query-buffer-limit 1gb - -# In the Redis protocol, bulk requests, that are, elements representing single -# strings, are normally limited ot 512 mb. However you can change this limit -# here. -# -# proto-max-bulk-len 512mb - -# Redis calls an internal function to perform many background tasks, like -# closing connections of clients in timeout, purging expired keys that are -# never requested, and so forth. -# -# Not all tasks are performed with the same frequency, but Redis checks for -# tasks to perform according to the specified "hz" value. -# -# By default "hz" is set to 10. Raising the value will use more CPU when -# Redis is idle, but at the same time will make Redis more responsive when -# there are many keys expiring at the same time, and timeouts may be -# handled with more precision. -# -# The range is between 1 and 500, however a value over 100 is usually not -# a good idea. Most users should use the default of 10 and raise this up to -# 100 only in environments where very low latency is required. -hz 10 - -# When a child rewrites the AOF file, if the following option is enabled -# the file will be fsync-ed every 32 MB of data generated. This is useful -# in order to commit the file to the disk more incrementally and avoid -# big latency spikes. -aof-rewrite-incremental-fsync yes - -# Redis LFU eviction (see maxmemory setting) can be tuned. However it is a good -# idea to start with the default settings and only change them after investigating -# how to improve the performances and how the keys LFU change over time, which -# is possible to inspect via the OBJECT FREQ command. -# -# There are two tunable parameters in the Redis LFU implementation: the -# counter logarithm factor and the counter decay time. It is important to -# understand what the two parameters mean before changing them. -# -# The LFU counter is just 8 bits per key, it's maximum value is 255, so Redis -# uses a probabilistic increment with logarithmic behavior. Given the value -# of the old counter, when a key is accessed, the counter is incremented in -# this way: -# -# 1. A random number R between 0 and 1 is extracted. -# 2. A probability P is calculated as 1/(old_value*lfu_log_factor+1). -# 3. The counter is incremented only if R < P. -# -# The default lfu-log-factor is 10. This is a table of how the frequency -# counter changes with a different number of accesses with different -# logarithmic factors: -# -# +--------+------------+------------+------------+------------+------------+ -# | factor | 100 hits | 1000 hits | 100K hits | 1M hits | 10M hits | -# +--------+------------+------------+------------+------------+------------+ -# | 0 | 104 | 255 | 255 | 255 | 255 | -# +--------+------------+------------+------------+------------+------------+ -# | 1 | 18 | 49 | 255 | 255 | 255 | -# +--------+------------+------------+------------+------------+------------+ -# | 10 | 10 | 18 | 142 | 255 | 255 | -# +--------+------------+------------+------------+------------+------------+ -# | 100 | 8 | 11 | 49 | 143 | 255 | -# +--------+------------+------------+------------+------------+------------+ -# -# NOTE: The above table was obtained by running the following commands: -# -# redis-benchmark -n 1000000 incr foo -# redis-cli object freq foo -# -# NOTE 2: The counter initial value is 5 in order to give new objects a chance -# to accumulate hits. -# -# The counter decay time is the time, in minutes, that must elapse in order -# for the key counter to be divided by two (or decremented if it has a value -# less <= 10). -# -# The default value for the lfu-decay-time is 1. A Special value of 0 means to -# decay the counter every time it happens to be scanned. -# -# lfu-log-factor 10 -# lfu-decay-time 1 - -########################### ACTIVE DEFRAGMENTATION ####################### -# -# WARNING THIS FEATURE IS EXPERIMENTAL. However it was stress tested -# even in production and manually tested by multiple engineers for some -# time. -# -# What is active defragmentation? -# ------------------------------- -# -# Active (online) defragmentation allows a Redis server to compact the -# spaces left between small allocations and deallocations of data in memory, -# thus allowing to reclaim back memory. -# -# Fragmentation is a natural process that happens with every allocator (but -# less so with Jemalloc, fortunately) and certain workloads. Normally a server -# restart is needed in order to lower the fragmentation, or at least to flush -# away all the data and create it again. However thanks to this feature -# implemented by Oran Agra for Redis 4.0 this process can happen at runtime -# in an "hot" way, while the server is running. -# -# Basically when the fragmentation is over a certain level (see the -# configuration options below) Redis will start to create new copies of the -# values in contiguous memory regions by exploiting certain specific Jemalloc -# features (in order to understand if an allocation is causing fragmentation -# and to allocate it in a better place), and at the same time, will release the -# old copies of the data. This process, repeated incrementally for all the keys -# will cause the fragmentation to drop back to normal values. -# -# Important things to understand: -# -# 1. This feature is disabled by default, and only works if you compiled Redis -# to use the copy of Jemalloc we ship with the source code of Redis. -# This is the default with Linux builds. -# -# 2. You never need to enable this feature if you don't have fragmentation -# issues. -# -# 3. Once you experience fragmentation, you can enable this feature when -# needed with the command "CONFIG SET activedefrag yes". -# -# The configuration parameters are able to fine tune the behavior of the -# defragmentation process. If you are not sure about what they mean it is -# a good idea to leave the defaults untouched. - -# Enabled active defragmentation -# activedefrag yes - -# Minimum amount of fragmentation waste to start active defrag -# active-defrag-ignore-bytes 100mb - -# Minimum percentage of fragmentation to start active defrag -# active-defrag-threshold-lower 10 - -# Maximum percentage of fragmentation at which we use maximum effort -# active-defrag-threshold-upper 100 - -# Minimal effort for defrag in CPU percentage -# active-defrag-cycle-min 25 - -# Maximal effort for defrag in CPU percentage -# active-defrag-cycle-max 75 - diff --git a/cache/run_redis.sh b/cache/run_redis.sh index b32bbdc..344430d 100755 --- a/cache/run_redis.sh +++ b/cache/run_redis.sh @@ -3,5 +3,4 @@ set -e set -x -../../redis/src/redis-server ./ris.conf -../../redis/src/redis-server ./prefixes.conf +../../redis/src/redis-server ./cache.conf diff --git a/cache/shutdown_redis.sh b/cache/shutdown_redis.sh index 9c45af7..0afce32 100755 --- a/cache/shutdown_redis.sh +++ b/cache/shutdown_redis.sh @@ -3,5 +3,4 @@ # set -e set -x -../../redis/src/redis-cli -s ./ris.sock shutdown -../../redis/src/redis-cli -s ./prefixes.sock shutdown +../../redis/src/redis-cli -s ./cache.sock shutdown diff --git a/old/StatsRipeText.py b/old/StatsRipeText.py deleted file mode 100644 index 9c32f61..0000000 --- a/old/StatsRipeText.py +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import logging -from abc import ABC - - -class RIPECaching(ABC): - - def __init__(self, sourceapp: str='bgpranking-ng', loglevel: int=logging.DEBUG): - self.sourceapp = sourceapp - self.hostname = 'stat.ripe.net' - self.port = 43 - self.__init_logger(loglevel) - - def __init_logger(self, loglevel): - self.logger = logging.getLogger('{}'.format(self.__class__.__name__)) - self.logger.setLevel(loglevel) diff --git a/old/initranking_RIPE.py b/old/initranking_RIPE.py deleted file mode 100644 index 6ece6b6..0000000 --- a/old/initranking_RIPE.py +++ /dev/null @@ -1,79 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import logging -import json -from redis import Redis -import asyncio - -from .libs.StatsRipeText import RIPECaching -from ipaddress import ip_network - - -class ASNLookup(RIPECaching): - - def __init__(self, sourceapp: str='bgpranking-ng', loglevel: int=logging.DEBUG): - super().__init__(sourceapp, loglevel) - self.redis_cache = Redis(host='localhost', port=6382, db=0, decode_responses=True) - self.logger.debug('Starting ASN lookup cache') - - async def get_all_asns(self): - reader, writer = await asyncio.open_connection(self.hostname, self.port) - to_send = '-d ris-asns list_asns=true asn_types=o sourceapp={}\n'.format(self.sourceapp) - writer.write(to_send.encode()) - ris_asns = json.loads(await reader.read()) - all_asns = ris_asns['asns']['originating'] - if not all_asns: - self.logger.warning('No ASNs in ris-asns, something went wrong.') - else: - self.redis_cache.sadd('asns', *all_asns) - self.redis_cache.sadd('asns_to_lookup', *all_asns) - - def fix_ipv4_networks(self, networks): - '''Because we can't have nice things. - Some netorks come without the last(s) bytes (i.e. 170.254.25/24)''' - to_return = [] - for net in networks: - try: - to_return.append(ip_network(net)) - except ValueError: - ip, mask = net.split('/') - iplist = ip.split('.') - iplist = iplist + ['0'] * (4 - len(iplist)) - to_return.append(ip_network('{}/{}'.format('.'.join(iplist), mask))) - return to_return - - async def get_originating_prefixes(self): - reader, writer = await asyncio.open_connection(self.hostname, self.port) - writer.write(b'-k\n') - while True: - asn = self.redis_cache.spop('asns_to_lookup') - if not asn: - break - self.logger.debug('ASN lookup: {}'.format(asn)) - to_send = '-d ris-prefixes {} list_prefixes=true types=o af=v4,v6 noise=filter sourceapp={}\n'.format(asn, self.sourceapp) - writer.write(to_send.encode()) - try: - data = await reader.readuntil(b'\n}\n') - except asyncio.streams.LimitOverrunError: - self.logger.debug('ASN lookup failed: {}'.format(asn)) - self.redis_cache.sadd('asns_to_lookup', asn) - writer.close() - reader, writer = await asyncio.open_connection(self.hostname, self.port) - - ris_prefixes = json.loads(data) - p = self.redis_cache.pipeline() - if ris_prefixes['prefixes']['v4']['originating']: - self.logger.debug('{} has ipv4'.format(asn)) - fixed_networks = self.fix_ipv4_networks(ris_prefixes['prefixes']['v4']['originating']) - p.sadd('{}|v4'.format(asn), *[str(net) for net in fixed_networks]) - total_ipv4 = sum([net.num_addresses for net in fixed_networks]) - p.set('{}|v4|ipcount'.format(asn), total_ipv4) - if ris_prefixes['prefixes']['v6']['originating']: - self.logger.debug('{} has ipv6'.format(asn)) - p.sadd('{}|v6'.format(asn), *ris_prefixes['prefixes']['v6']['originating']) - total_ipv6 = sum([ip_network(prefix).num_addresses for prefix in ris_prefixes['prefixes']['v6']['originating']]) - p.set('{}|v4|ipcount'.format(asn), total_ipv6) - p.execute() - writer.write(b'-k\n') - writer.close() diff --git a/old/risfetcher_RIPE.py b/old/risfetcher_RIPE.py deleted file mode 100644 index c838101..0000000 --- a/old/risfetcher_RIPE.py +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import logging -import json -from redis import Redis - -from .libs.StatsRipeText import RIPECaching -import asyncio - - -class RISPrefixLookup(RIPECaching): - - def __init__(self, sourceapp: str='bgpranking-ng', loglevel: int=logging.DEBUG): - super().__init__(sourceapp, loglevel) - self.logger.debug('Starting RIS Prefix fetcher') - - def cache_prefix(self, redis_cache, ip, network_info, prefix_overview): - prefix = network_info['prefix'] - asns = network_info['asns'] - # description = prefix_overview['block']['desc'] - # if not description: - # description = prefix_overview['block']['name'] - p = redis_cache.pipeline() - for asn in asns: - p.hmset(ip, {'asn': asn, 'prefix': prefix}) # , 'description': description}) - p.expire(ip, 43200) # 12H - p.execute() - - async def run(self): - redis_cache = Redis(host='localhost', port=6581, db=0, decode_responses=True) - reader, writer = await asyncio.open_connection(self.hostname, self.port) - - writer.write(b'-k\n') - while True: - ip = redis_cache.spop('for_ris_lookup') - if not ip: # TODO: add a check against something to stop the loop - self.logger.debug('Nothing to lookup') - await asyncio.sleep(10) - continue - if redis_cache.exists(ip): - self.logger.debug('Already cached: {}'.format(ip)) - continue - self.logger.debug('RIS lookup: {}'.format(ip)) - to_send = '-d network-info {} sourceapp={}\n'.format(ip, self.sourceapp) - writer.write(to_send.encode()) - data = await reader.readuntil(b'\n}\n') - network_info = json.loads(data) - if not network_info.get('prefix'): - self.logger.warning('The IP {} does not seem to be announced'.format(ip)) - continue - # self.logger.debug('Prefix lookup: {}'.format(ip)) - # to_send = '-d prefix-overview {} sourceapp={}\n'.format(network_info['prefix'], self.sourceapp) - # writer.write(to_send.encode()) - # data = await reader.readuntil(b'\n}\n') - # prefix_overview = json.loads(data) - # self.logger.debug('RIS cache prefix info: {}'.format(ip)) - # self.cache_prefix(redis_cache, ip, network_info, prefix_overview) - self.cache_prefix(redis_cache, ip, network_info, {}) - writer.write(b'-k\n') - writer.close() diff --git a/requirements.txt b/requirements.txt index 0a51ca2..b82c1ce 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,6 @@ git+https://github.com/MISP/PyTaxonomies git+https://github.com/MISP/PyMISPGalaxies.git beautifulsoup4 + +# IPASN web client +git+https://github.com/D4-project/IPASN-History.git/#egg=pyipasnhistory&subdirectory=client diff --git a/setup.py b/setup.py index 56748bc..d9eecf6 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ setup( description='BGP Ranking, the new one..', packages=['bgpranking'], scripts=['bin/archiver.py', 'bin/dbinsert.py', 'bin/fetcher.py', 'bin/parser.py', - 'bin/loadprefixes.py', 'bin/rislookup.py', 'bin/sanitizer.py', 'bin/run_backend.py', 'bin/ssfetcher.py', + 'bin/sanitizer.py', 'bin/run_backend.py', 'bin/ssfetcher.py', 'bin/monitor.py', 'bin/ranking.py', 'bin/asn_descriptions.py', 'bin/start.py', 'bin/stop.py', 'bin/shutdown.py'], classifiers=[ 'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)',