new: Add Shadowserver module

2018-07-13 14:51:00 +02:00 · 2018-07-13 14:51:00 +02:00 · 6045635b72
parent faa3c634f2
commit 6045635b72
31 changed files with 366 additions and 89 deletions
--- a/.gitignore
+++ b/.gitignore
@ -110,3 +110,11 @@ rawdata
 storage/ardb.pid
 storage/data
 storage/repl
 # Config file shadow server with password
 bgpranking/config/shadowserver.json
 # Ths shadow server config files are dynamically generated
 bgpranking/config/modules/shadowserver_*.json
 # Do not store the d3 lib in the repo
 website/web/static/d3*.js
--- a/bgpranking/abstractmanager.py
+++ b/bgpranking/abstractmanager.py
@ -10,6 +10,7 @@ from .libs.helpers import long_sleep, shutdown_requested
 class AbstractManager(ABC):
    def __init__(self, loglevel: int=logging.DEBUG):
        self.loglevel = loglevel
        self.logger = logging.getLogger(f'{self.__class__.__name__}')
        self.logger.setLevel(loglevel)
        self.logger.info(f'Initializing {self.__class__.__name__}')
--- a/bgpranking/config/modules/shadowserver_only.sh
+++ b/bgpranking/config/modules/shadowserver_only.sh
@ -0,0 +1,6 @@
 #!/bin/bash
 set -e
 set -x
 find .  -maxdepth 1 -type f -name "*.json" ! -iname "shadowserver*.json" -delete
--- a/bgpranking/config/shadowserver.json.example
+++ b/bgpranking/config/shadowserver.json.example
@ -0,0 +1,4 @@
 {
  "user": "[USERNAME]",
  "password": "[PASSWORD]"
 }
--- a/bgpranking/modulesfetcher.py
+++ b/bgpranking/modulesfetcher.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 import aiohttp
@ -112,10 +112,10 @@ class Fetcher():
            self.logger.debug('New list, no hisorical files')
            # nothing has been downloaded ever, moving on
            return False
        dl_hash = sha512(downloaded)
        for last_file in to_check:
            with last_file.open('rb') as f:
                last_hash = sha512(f.read())
            dl_hash = sha512(downloaded)
            if (dl_hash.digest() == last_hash.digest() and
                    parser.parse(last_file.name.split('.')[0]).date() == date.today()):
                self.logger.debug('Same file already downloaded today.')
--- a/bgpranking/parser.py
+++ b/bgpranking/parser.py
@ -11,7 +11,7 @@ from uuid import uuid4
 from io import BytesIO
 import importlib
-from typing import List
+from typing import List, Union, Tuple
 import types
 from .libs.helpers import safe_create_dir, set_running, unset_running, get_socket_path
@ -20,7 +20,7 @@ from .libs.helpers import safe_create_dir, set_running, unset_running, get_socke
 class RawFilesParser():
    def __init__(self, config_file: Path, storage_directory: Path,
-                 loglevel: int=logging.DEBUG):
+                 loglevel: int=logging.DEBUG) -> None:
        with open(config_file, 'r') as f:
            module_parameters = json.load(f)
        self.vendor = module_parameters['vendor']
@ -36,7 +36,7 @@ class RawFilesParser():
        self.redis_intake = StrictRedis(unix_socket_path=get_socket_path('intake'), db=0)
        self.logger.debug(f'Starting intake on {self.source}')
-    def __init_logger(self, loglevel):
+    def __init_logger(self, loglevel) -> None:
        self.logger = logging.getLogger(f'{self.__class__.__name__}-{self.vendor}-{self.listname}')
        self.logger.setLevel(loglevel)
@ -52,12 +52,12 @@ class RawFilesParser():
        Only run it when needed, it is nasty and slow'''
        return ['.'.join(str(int(part)) for part in ip.split(b'.')).encode() for ip in ips]
-    def parse_raw_file(self, f: BytesIO):
+    def parse_raw_file(self, f: BytesIO) -> List[bytes]:
        # If the list doesn't provide a time, fallback to current day, midnight
        self.datetime = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
        return self.extract_ipv4(f.getvalue())
-    def parse_raw_files(self):
+    def parse_raw_files(self) -> None:
        set_running(f'{self.__class__.__name__}-{self.source}')
        nb_unparsable_files = len([f for f in self.unparsable_dir.iterdir() if f.is_file()])
        if nb_unparsable_files:
@ -69,9 +69,13 @@ class RawFilesParser():
                    to_parse = BytesIO(f.read())
                p = self.redis_intake.pipeline()
                for ip in self.parse_raw_file(to_parse):
                    if isinstance(ip, tuple):
                        ip, datetime = ip
                    else:
                        datetime = self.datetime
                    uuid = uuid4()
                    p.hmset(uuid, {'ip': ip, 'source': self.source,
-                                   'datetime': self.datetime.isoformat()})
+                                   'datetime': datetime.isoformat()})
                    p.sadd('intake', uuid)
                p.execute()
                self._archive(filepath)
@ -81,10 +85,10 @@ class RawFilesParser():
        finally:
            unset_running(f'{self.__class__.__name__}-{self.source}')
-    def _archive(self, filepath: Path):
+    def _archive(self, filepath: Path) -> None:
        '''After processing, move file to the archive directory'''
        filepath.rename(self.directory / 'archive' / filepath.name)
-    def _unparsable(self, filepath: Path):
+    def _unparsable(self, filepath: Path) -> None:
        '''After processing, move file to the archive directory'''
        filepath.rename(self.unparsable_dir / filepath.name)
--- a/bgpranking/parsers/abusech.py
+++ b/bgpranking/parsers/abusech.py
@ -1,11 +1,13 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 from dateutil.parser import parse
 import re
 from io import BytesIO
 from typing import List
-def parse_raw_file(self, f: BytesIO):
+
 def parse_raw_file(self, f: BytesIO) -> List[bytes]:
    self.datetime = parse(re.findall(b'# Generated on (.*)#\n', f.getvalue())[0])
    return self.extract_ipv4(f.getvalue())
--- a/bgpranking/parsers/bambenekconsulting.py
+++ b/bgpranking/parsers/bambenekconsulting.py
@ -1,11 +1,13 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 from dateutil.parser import parse
 import re
 from io import BytesIO
 from typing import List
-def parse_raw_file(self, f: BytesIO):
+
 def parse_raw_file(self, f: BytesIO) -> List[bytes]:
    self.datetime = parse(re.findall(b'## Feed generated at: (.*)\n', f.getvalue())[0])
    return self.extract_ipv4(f.getvalue())
--- a/bgpranking/parsers/default.py
+++ b/bgpranking/parsers/default.py
@ -1,14 +0,0 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 from io import BytesIO
 from datetime import datetime
 from ..simple_feed_fetcher import RawFileImporter
 class DefaultImporter(RawFileImporter):
    def parse_raw_file(self, f: BytesIO):
        self.datetime = datetime.now()
        return self.extract_ipv4(f.getvalue())
--- a/bgpranking/parsers/dshield.py
+++ b/bgpranking/parsers/dshield.py
@ -4,9 +4,10 @@
 from dateutil.parser import parse
 import re
 from io import BytesIO
 from typing import List
-def parse_raw_file(self, f: BytesIO):
+def parse_raw_file(self, f: BytesIO) -> List[bytes]:
    self.datetime = parse(re.findall(b'# updated (.*)\n', f.getvalue())[0])
    iplist = self.extract_ipv4(f.getvalue())
    # The IPS have leading 0s. Getting tid of them directly here.
--- a/bgpranking/parsers/malc0de.py
+++ b/bgpranking/parsers/malc0de.py
@ -1,11 +1,13 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 from dateutil.parser import parse
 import re
 from io import BytesIO
 from typing import List
-def parse_raw_file(self, f: BytesIO):
+
 def parse_raw_file(self, f: BytesIO) -> List[bytes]:
    self.datetime = parse(re.findall(b'// Last updated (.*)\n', f.getvalue())[0])
    return self.extract_ipv4(f.getvalue())
--- a/bgpranking/parsers/nothink.py
+++ b/bgpranking/parsers/nothink.py
@ -1,11 +1,12 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 from dateutil.parser import parse
 import re
 from io import BytesIO
 from typing import List
-def parse_raw_file(self, f: BytesIO):
+def parse_raw_file(self, f: BytesIO)  -> List[bytes]:
    self.datetime = parse(re.findall(b'# Generated (.*)\n', f.getvalue())[0])
    return self.extract_ipv4(f.getvalue())
--- a/bgpranking/parsers/shadowserver.py
+++ b/bgpranking/parsers/shadowserver.py
@ -0,0 +1,28 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 from dateutil.parser import parse
 from csv import DictReader
 from io import BytesIO, StringIO
 from typing import Tuple, Generator
 from datetime import datetime
 def parse_raw_file(self, f: BytesIO) -> Generator[Tuple[str, datetime], None, None]:
    default_ts = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
    reader = DictReader(StringIO(f.getvalue().decode()))
    for row in reader:
        if 'timestamp' in row:
            ts = parse(row['timestamp'])
        else:
            ts = default_ts
        if 'ip' in row:
            ip = row['ip']
        elif 'src_ip' in row:
            # For sinkhole6_http
            ip = row['src_ip']
        else:
            self.logger.critical(f'No IPs in the list {self.source}.')
            break
        yield ip, ts
--- a/bgpranking/ranking.py
+++ b/bgpranking/ranking.py
@ -4,7 +4,7 @@
 import logging
 from redis import StrictRedis
 from .libs.helpers import set_running, unset_running, get_socket_path, load_config_files
-from datetime import date
+from datetime import datetime, date, timedelta
 from ipaddress import ip_network
 from pathlib import Path
@ -22,40 +22,34 @@ class Ranking():
        self.logger = logging.getLogger(f'{self.__class__.__name__}')
        self.logger.setLevel(loglevel)
-    def compute(self):
+    def rank_a_day(self, day: str):
-        self.logger.info('Start ranking')
+        # FIXME: If we want to rank an older date, we need to hav older datasets for the announces
        set_running(self.__class__.__name__)
        today = date.today().isoformat()
        v4_last, v6_last = self.asn_meta.mget('v4|last', 'v6|last')
-        if not v4_last or not v6_last:
+        asns_aggregation_key_v4 = f'{day}|asns|v4'
-            '''Failsafe if asn_meta has not been populated yet'''
+        asns_aggregation_key_v6 = f'{day}|asns|v6'
            unset_running(self.__class__.__name__)
            return
        asns_aggregation_key_v4 = f'{today}|asns|v4'
        asns_aggregation_key_v6 = f'{today}|asns|v6'
        to_delete = set([asns_aggregation_key_v4, asns_aggregation_key_v6])
        r_pipeline = self.ranking.pipeline()
-        for source in self.storage.smembers(f'{today}|sources'):
+        for source in self.storage.smembers(f'{day}|sources'):
-            self.logger.info(f'{today} - Ranking source: {source}')
+            self.logger.info(f'{day} - Ranking source: {source}')
-            source_aggregation_key_v4 = f'{today}|{source}|asns|v4'
+            source_aggregation_key_v4 = f'{day}|{source}|asns|v4'
-            source_aggregation_key_v6 = f'{today}|{source}|asns|v6'
+            source_aggregation_key_v6 = f'{day}|{source}|asns|v6'
            to_delete.update([source_aggregation_key_v4, source_aggregation_key_v6])
-            for asn in self.storage.smembers(f'{today}|{source}'):
+            for asn in self.storage.smembers(f'{day}|{source}'):
-                prefixes_aggregation_key_v4 = f'{today}|{asn}|v4'
+                prefixes_aggregation_key_v4 = f'{day}|{asn}|v4'
-                prefixes_aggregation_key_v6 = f'{today}|{asn}|v6'
+                prefixes_aggregation_key_v6 = f'{day}|{asn}|v6'
                to_delete.update([prefixes_aggregation_key_v4, prefixes_aggregation_key_v6])
                if asn == '0':
                    # Default ASN when no matches. Probably spoofed.
                    continue
-                self.logger.debug(f'{today} - Ranking source: {source} / ASN: {asn}')
+                self.logger.debug(f'{day} - Ranking source: {source} / ASN: {asn}')
                asn_rank_v4 = 0.0
                asn_rank_v6 = 0.0
-                for prefix in self.storage.smembers(f'{today}|{source}|{asn}'):
+                for prefix in self.storage.smembers(f'{day}|{source}|{asn}'):
                    ips = set([ip_ts.split('|')[0]
-                               for ip_ts in self.storage.smembers(f'{today}|{source}|{asn}|{prefix}')])
+                               for ip_ts in self.storage.smembers(f'{day}|{source}|{asn}|{prefix}')])
                    py_prefix = ip_network(prefix)
                    prefix_rank = float(len(ips)) / py_prefix.num_addresses
-                    r_pipeline.zadd(f'{today}|{source}|{asn}|v{py_prefix.version}|prefixes', prefix_rank, prefix)
+                    r_pipeline.zadd(f'{day}|{source}|{asn}|v{py_prefix.version}|prefixes', prefix_rank, prefix)
                    if py_prefix.version == 4:
                        asn_rank_v4 += len(ips) * self.config_files[source]['impact']
                        r_pipeline.zincrby(prefixes_aggregation_key_v4, prefix, prefix_rank * self.config_files[source]['impact'])
@ -66,17 +60,32 @@ class Ranking():
                if v4count:
                    asn_rank_v4 /= float(v4count)
                    if asn_rank_v4:
-                        r_pipeline.set(f'{today}|{source}|{asn}|v4', asn_rank_v4)
+                        r_pipeline.set(f'{day}|{source}|{asn}|v4', asn_rank_v4)
                        r_pipeline.zincrby(asns_aggregation_key_v4, asn, asn_rank_v4)
                        r_pipeline.zadd(source_aggregation_key_v4, asn_rank_v4, asn)
                if v6count:
                    asn_rank_v6 /= float(v6count)
                    if asn_rank_v6:
-                        r_pipeline.set(f'{today}|{source}|{asn}|v6', asn_rank_v6)
+                        r_pipeline.set(f'{day}|{source}|{asn}|v6', asn_rank_v6)
                        r_pipeline.zincrby(asns_aggregation_key_v6, asn, asn_rank_v6)
                        r_pipeline.zadd(source_aggregation_key_v6, asn_rank_v4, asn)
        self.ranking.delete(*to_delete)
        r_pipeline.execute()
    def compute(self):
        self.logger.info('Start ranking')
        set_running(self.__class__.__name__)
        v4_last, v6_last = self.asn_meta.mget('v4|last', 'v6|last')
        if not v4_last or not v6_last:
            '''Failsafe if asn_meta has not been populated yet'''
            unset_running(self.__class__.__name__)
            return
        today = date.today()
        now = datetime.now()
        today12am = now.replace(hour=12, minute=0, second=0, microsecond=0)
        if now < today12am:
            # Compute yesterday and today's ranking (useful when we have lists generated only once a day)
            self.rank_a_day((today - timedelta(days=1)).isoformat())
        self.rank_a_day(today.isoformat())
        unset_running(self.__class__.__name__)
        self.logger.info('Ranking done.')
--- a/bgpranking/shadowserverfetcher.py
+++ b/bgpranking/shadowserverfetcher.py
@ -0,0 +1,159 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 import aiohttp
 import logging
 from bs4 import BeautifulSoup
 from dateutil.parser import parse
 from typing import Tuple
 from datetime import datetime, date
 from pathlib import Path
 from .libs.helpers import safe_create_dir, set_running, unset_running
 import json
 class ShadowServerFetcher():
    def __init__(self, user, password, config_path_modules: Path, storage_directory: Path,
                 loglevel: int=logging.DEBUG) -> None:
        self.__init_logger(loglevel)
        self.storage_directory = storage_directory
        self.config_path_modules = config_path_modules
        self.user = user
        self.password = password
        self.index_page = 'https://dl.shadowserver.org/reports/index.php'
        self.vendor = 'shadowserver'
        self.known_list_types = ('blacklist', 'botnet', 'cc', 'cisco', 'cwsandbox', 'drone',
                                 'microsoft', 'scan', 'sinkhole6', 'sinkhole')
        self.first_available_day = None
        self.last_available_day = None
        self.available_entries = {}
    def __init_logger(self, loglevel):
        self.logger = logging.getLogger(f'{self.__class__.__name__}')
        self.logger.setLevel(loglevel)
    async def __get_index(self):
        auth_details = {'user': self.user, 'password': self.password, 'login': 'Login'}
        async with aiohttp.ClientSession() as s:
            self.logger.debug('Fetching the index.')
            async with s.post(self.index_page, data=auth_details) as r:
                return await r.text()
    async def __build_daily_dict(self):
        html_index = await self.__get_index()
        soup = BeautifulSoup(html_index, 'html.parser')
        treeview = soup.find(id='treemenu1')
        for y in treeview.select('> li'):
            year = y.contents[0]
            for m in y.contents[1].select('> li'):
                month = m.contents[0]
                for d in m.contents[1].select('> li'):
                    day = d.contents[0]
                    date = parse(f'{year} {month} {day}').date()
                    self.available_entries[date.isoformat()] = []
                    for a in d.contents[1].find_all('a', href=True):
                        if not self.first_available_day:
                            self.first_available_day = date
                        self.last_available_day = date
                        self.available_entries[date.isoformat()].append((a['href'], a.string))
        self.logger.debug('Dictionary created.')
    def __normalize_day(self, day: Tuple[str, date, datetime]=None) -> str:
        if not day:
            if not self.last_available_day:
                raise Exception('Unable to figure out the last available day. You need to run build_daily_dict first')
            day = self.last_available_day
        else:
            if isinstance(day, str):
                day = parse(day).date()
            elif isinstance(day, datetime):
                day = day.date()
        return day.isoformat()
    def __split_name(self, name):
        type_content, country, list_type = name.split('-')
        if '_' in type_content:
            type_content, details_type = type_content.split('_', maxsplit=1)
            if '_' in details_type:
                details_type, sub = details_type.split('_')
                return list_type, country, (type_content, details_type, sub)
            return list_type, country, (type_content, details_type)
        return list_type, country, (type_content)
    def __check_config(self, filename: str) -> Path:
        self.logger.debug(f'Working on config for {filename}.')
        config = {'vendor': 'shadowserver', 'parser': '.parsers.shadowserver'}
        type_content, _, type_details = self.__split_name(filename)
        prefix = type_content.split('.')[0]
        config['name'] = '{}-{}'.format(prefix, '_'.join(type_details))
        main_type = type_details[0]
        if main_type not in self.known_list_types:
            self.logger.warning(f'Unknown type: {main_type}. Please update the config creator script.')
            return None
        if main_type == 'blacklist':
            config['impact'] = 5
        elif main_type == 'botnet':
            config['impact'] = 2
        elif main_type == 'cc':
            config['impact'] = 5
        elif main_type == 'cisco':
            config['impact'] = 3
        elif main_type == 'cwsandbox':
            config['impact'] = 5
        elif main_type == 'drone':
            config['impact'] = 2
        elif main_type == 'microsoft':
            config['impact'] = 3
        elif main_type == 'scan':
            config['impact'] = 1
        elif main_type == 'sinkhole6':
            config['impact'] = 2
        elif main_type == 'sinkhole':
            config['impact'] = 2
        if not (self.config_path_modules / f"{config['vendor']}_{config['name']}.json").exists():
            self.logger.debug(f'Creating config file for {filename}.')
            with open(self.config_path_modules / f"{config['vendor']}_{config['name']}.json", 'w') as f:
                json.dump(config, f, indent=2)
        else:
            with open(self.config_path_modules / f"{config['vendor']}_{config['name']}.json", 'r') as f:
                # Validate new config file with old
                config_current = json.load(f)
                if config_current != config:
                    self.logger.warning('The config file created by this script is different from the one on disk: \n{}\n{}'.format(json.dumps(config), json.dumps(config_current)))
        # Init list directory
        directory = self.storage_directory / config['vendor'] / config['name']
        safe_create_dir(directory)
        meta = directory / 'meta'
        safe_create_dir(meta)
        archive_dir = directory / 'archive'
        safe_create_dir(archive_dir)
        self.logger.debug(f'Done with config for {filename}.')
        return directory
    async def download_daily_entries(self, day: Tuple[str, date, datetime]=None):
        set_running(f'{self.__class__.__name__}')
        await self.__build_daily_dict()
        for url, filename in self.available_entries[self.__normalize_day(day)]:
            storage_dir = self.__check_config(filename)
            if not storage_dir:
                continue
            # Check if the file we're trying to download has already been downloaded. Skip if True.
            uuid = url.split('/')[-1]
            if (storage_dir / 'meta' / 'last_download').exists():
                with open(storage_dir / 'meta' / 'last_download') as f:
                    last_download_uuid = f.read()
                if last_download_uuid == uuid:
                    self.logger.debug(f'Already downloaded: {url}.')
                    continue
            async with aiohttp.ClientSession() as s:
                async with s.get(url) as r:
                    self.logger.info(f'Downloading {url}.')
                    content = await r.content.read()
                    with (storage_dir / '{}.txt'.format(datetime.now().isoformat())).open('wb') as f:
                        f.write(content)
                    with open(storage_dir / 'meta' / 'last_download', 'w') as f:
                        f.write(uuid)
        unset_running(f'{self.__class__.__name__}')
--- a/bin/archiver.py
+++ b/bin/archiver.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 from bgpranking.archive import DeepArchive
--- a/bin/asn_descriptions.py
+++ b/bin/asn_descriptions.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 import logging
--- a/bin/dbinsert.py
+++ b/bin/dbinsert.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 import logging
--- a/bin/fetcher.py
+++ b/bin/fetcher.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 import logging
@ -21,12 +21,18 @@ class ModulesManager(AbstractManager):
        if not config_dir:
            config_dir = get_config_path()
        if not storage_directory:
-            storage_directory = get_homedir() / 'rawdata'
+            self.storage_directory = get_homedir() / 'rawdata'
-        modules_config = config_dir / 'modules'
+        self.modules_config = config_dir / 'modules'
-        modules_paths = [modulepath for modulepath in modules_config.glob('*.json')]
+        self.modules_paths = [modulepath for modulepath in self.modules_config.glob('*.json')]
-        self.modules = [Fetcher(path, storage_directory, loglevel) for path in modules_paths]
+        self.modules = [Fetcher(path, self.storage_directory, loglevel) for path in self.modules_paths]
    def _to_run_forever(self):
        # Check if there are new config files
        new_modules_paths = [modulepath for modulepath in self.modules_config.glob('*.json') if modulepath not in self.modules_paths]
        self.modules += [Fetcher(path, self.storage_directory, self.loglevel) for path in new_modules_paths]
        self.modules_paths += new_modules_paths
        if self.modules:
            loop = asyncio.get_event_loop()
            try:
                loop.run_until_complete(asyncio.gather(
@ -35,6 +41,8 @@ class ModulesManager(AbstractManager):
                )
            except aiohttp.client_exceptions.ClientConnectorError as e:
                self.logger.critical(f'Exception while fetching lists: {e}')
        else:
            self.logger.info('No config files were found so there are no fetchers running yet. Will try again later.')
 if __name__ == '__main__':
--- a/bin/loadprefixes.py
+++ b/bin/loadprefixes.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 import logging
--- a/bin/monitor.py
+++ b/bin/monitor.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 from bgpranking.monitor import Monitor
--- a/bin/parser.py
+++ b/bin/parser.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 import logging
@ -19,13 +19,21 @@ class ParserManager(AbstractManager):
        if not config_dir:
            config_dir = get_config_path()
        if not storage_directory:
-            storage_directory = get_homedir() / 'rawdata'
+            self.storage_directory = get_homedir() / 'rawdata'
-        modules_config = config_dir / 'modules'
+        self.modules_config = config_dir / 'modules'
-        modules_paths = [modulepath for modulepath in modules_config.glob('*.json')]
+        self.modules_paths = [modulepath for modulepath in self.modules_config.glob('*.json')]
-        self.modules = [RawFilesParser(path, storage_directory, loglevel) for path in modules_paths]
+        self.modules = [RawFilesParser(path, self.storage_directory, loglevel) for path in self.modules_paths]
    def _to_run_forever(self):
        # Check if there are new config files
        new_modules_paths = [modulepath for modulepath in self.modules_config.glob('*.json') if modulepath not in self.modules_paths]
        self.modules += [RawFilesParser(path, self.storage_directory, self.loglevel) for path in new_modules_paths]
        self.modules_paths += new_modules_paths
        if self.modules:
            [module.parse_raw_files() for module in self.modules]
        else:
            self.logger.warning('No config files were found so there are no parsers running yet. Will try again later.')
 if __name__ == '__main__':
--- a/bin/ranking.py
+++ b/bin/ranking.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 import logging
--- a/bin/rislookup.py
+++ b/bin/rislookup.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 import logging
--- a/bin/run_backend.py
+++ b/bin/run_backend.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 from bgpranking.libs.helpers import get_homedir, check_running
--- a/bin/sanitizer.py
+++ b/bin/sanitizer.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 import logging
--- a/bin/shutdown.py
+++ b/bin/shutdown.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 from bgpranking.libs.helpers import is_running, get_socket_path
--- a/bin/ssfetcher.py
+++ b/bin/ssfetcher.py
@ -0,0 +1,47 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 import logging
 import json
 import asyncio
 from pathlib import Path
 import aiohttp
 from bgpranking.abstractmanager import AbstractManager
 from bgpranking.shadowserverfetcher import ShadowServerFetcher
 from bgpranking.libs.helpers import get_config_path, get_homedir
 logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
                    level=logging.INFO, datefmt='%I:%M:%S')
 class ShadowServerManager(AbstractManager):
    def __init__(self, config_dir: Path=None, storage_directory: Path=None, loglevel: int=logging.DEBUG):
        super().__init__(loglevel)
        self.config = True
        if not config_dir:
            config_dir = get_config_path()
        if not (config_dir / 'shadowserver.json').exists():
            self.config = False
            self.logger.warning(f'No config file available, the shadow server module will not be launched.')
            return
        with open(config_dir / 'shadowserver.json') as f:
            ss_config = json.load(f)
        if not storage_directory:
            storage_directory = get_homedir() / 'rawdata'
        modules_config = config_dir / 'modules'
        self.fetcher = ShadowServerFetcher(ss_config['user'], ss_config['password'], modules_config, storage_directory, loglevel)
    def _to_run_forever(self):
        loop = asyncio.get_event_loop()
        try:
            loop.run_until_complete(self.fetcher.download_daily_entries())
        except aiohttp.client_exceptions.ClientConnectorError as e:
            self.logger.critical(f'Exception while fetching Shadow Server lists: {e}')
 if __name__ == '__main__':
    modules_manager = ShadowServerManager()
    if modules_manager.config:
        modules_manager.run(sleep_in_sec=3600)
--- a/bin/start.py
+++ b/bin/start.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 from subprocess import Popen
@ -13,6 +13,7 @@ if __name__ == '__main__':
    Popen(['loadprefixes.py'])
    Popen(['rislookup.py'])
    Popen(['fetcher.py'])
    Popen(['ssfetcher.py'])
    Popen(['parser.py'])
    Popen(['sanitizer.py'])
    Popen(['dbinsert.py'])
--- a/bin/stop.py
+++ b/bin/stop.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 from subprocess import Popen
--- a/setup.py
+++ b/setup.py
@ -13,7 +13,7 @@ setup(
    description='BGP Ranking, the new one..',
    packages=['bgpranking'],
    scripts=['bin/archiver.py', 'bin/dbinsert.py', 'bin/fetcher.py', 'bin/parser.py',
-             'bin/loadprefixes.py', 'bin/rislookup.py', 'bin/sanitizer.py', 'bin/run_backend.py',
+             'bin/loadprefixes.py', 'bin/rislookup.py', 'bin/sanitizer.py', 'bin/run_backend.py', 'bin/ssfetcher.py',
             'bin/monitor.py', 'bin/ranking.py', 'bin/asn_descriptions.py', 'bin/start.py', 'bin/stop.py', 'bin/shutdown.py'],
    classifiers=[
        'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)',