new: Add Shadowserver module

2018-07-13 14:51:00 +02:00 · 2018-07-13 14:51:00 +02:00 · 6045635b72
parent faa3c634f2
commit 6045635b72
31 changed files with 366 additions and 89 deletions
--- a/.gitignore
+++ b/.gitignore
@ -110,3 +110,11 @@ rawdata
 storage/ardb.pid
 storage/data
 storage/repl
+
+# Config file shadow server with password
+bgpranking/config/shadowserver.json
+# Ths shadow server config files are dynamically generated
+bgpranking/config/modules/shadowserver_*.json
+
+# Do not store the d3 lib in the repo
+website/web/static/d3*.js
--- a/bgpranking/abstractmanager.py
+++ b/bgpranking/abstractmanager.py
@ -10,6 +10,7 @@ from .libs.helpers import long_sleep, shutdown_requested
 class AbstractManager(ABC):

    def __init__(self, loglevel: int=logging.DEBUG):
+        self.loglevel = loglevel
        self.logger = logging.getLogger(f'{self.__class__.__name__}')
        self.logger.setLevel(loglevel)
        self.logger.info(f'Initializing {self.__class__.__name__}')
--- a/bgpranking/config/modules/shadowserver_only.sh
+++ b/bgpranking/config/modules/shadowserver_only.sh
@ -0,0 +1,6 @@
+#!/bin/bash
+
+set -e
+set -x
+
+find .  -maxdepth 1 -type f -name "*.json" ! -iname "shadowserver*.json" -delete
--- a/bgpranking/config/shadowserver.json.example
+++ b/bgpranking/config/shadowserver.json.example
@ -0,0 +1,4 @@
+{
+  "user": "[USERNAME]",
+  "password": "[PASSWORD]"
+}
--- a/bgpranking/modulesfetcher.py
+++ b/bgpranking/modulesfetcher.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 import aiohttp
@ -112,10 +112,10 @@ class Fetcher():
            self.logger.debug('New list, no hisorical files')
            # nothing has been downloaded ever, moving on
            return False
+        dl_hash = sha512(downloaded)
        for last_file in to_check:
            with last_file.open('rb') as f:
                last_hash = sha512(f.read())
-            dl_hash = sha512(downloaded)
            if (dl_hash.digest() == last_hash.digest() and
                    parser.parse(last_file.name.split('.')[0]).date() == date.today()):
                self.logger.debug('Same file already downloaded today.')
--- a/bgpranking/parser.py
+++ b/bgpranking/parser.py
@ -11,7 +11,7 @@ from uuid import uuid4
 from io import BytesIO
 import importlib

-from typing import List
+from typing import List, Union, Tuple
 import types

 from .libs.helpers import safe_create_dir, set_running, unset_running, get_socket_path
@ -20,7 +20,7 @@ from .libs.helpers import safe_create_dir, set_running, unset_running, get_socke
 class RawFilesParser():

    def __init__(self, config_file: Path, storage_directory: Path,
-                 loglevel: int=logging.DEBUG):
+                 loglevel: int=logging.DEBUG) -> None:
        with open(config_file, 'r') as f:
            module_parameters = json.load(f)
        self.vendor = module_parameters['vendor']
@ -36,7 +36,7 @@ class RawFilesParser():
        self.redis_intake = StrictRedis(unix_socket_path=get_socket_path('intake'), db=0)
        self.logger.debug(f'Starting intake on {self.source}')

-    def __init_logger(self, loglevel):
+    def __init_logger(self, loglevel) -> None:
        self.logger = logging.getLogger(f'{self.__class__.__name__}-{self.vendor}-{self.listname}')
        self.logger.setLevel(loglevel)

@ -52,12 +52,12 @@ class RawFilesParser():
        Only run it when needed, it is nasty and slow'''
        return ['.'.join(str(int(part)) for part in ip.split(b'.')).encode() for ip in ips]

-    def parse_raw_file(self, f: BytesIO):
+    def parse_raw_file(self, f: BytesIO) -> List[bytes]:
        # If the list doesn't provide a time, fallback to current day, midnight
        self.datetime = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
        return self.extract_ipv4(f.getvalue())

-    def parse_raw_files(self):
+    def parse_raw_files(self) -> None:
        set_running(f'{self.__class__.__name__}-{self.source}')
        nb_unparsable_files = len([f for f in self.unparsable_dir.iterdir() if f.is_file()])
        if nb_unparsable_files:
@ -69,9 +69,13 @@ class RawFilesParser():
                    to_parse = BytesIO(f.read())
                p = self.redis_intake.pipeline()
                for ip in self.parse_raw_file(to_parse):
+                    if isinstance(ip, tuple):
+                        ip, datetime = ip
+                    else:
+                        datetime = self.datetime
                    uuid = uuid4()
                    p.hmset(uuid, {'ip': ip, 'source': self.source,
-                                   'datetime': self.datetime.isoformat()})
+                                   'datetime': datetime.isoformat()})
                    p.sadd('intake', uuid)
                p.execute()
                self._archive(filepath)
@ -81,10 +85,10 @@ class RawFilesParser():
        finally:
            unset_running(f'{self.__class__.__name__}-{self.source}')

-    def _archive(self, filepath: Path):
+    def _archive(self, filepath: Path) -> None:
        '''After processing, move file to the archive directory'''
        filepath.rename(self.directory / 'archive' / filepath.name)

-    def _unparsable(self, filepath: Path):
+    def _unparsable(self, filepath: Path) -> None:
        '''After processing, move file to the archive directory'''
        filepath.rename(self.unparsable_dir / filepath.name)
--- a/bgpranking/parsers/abusech.py
+++ b/bgpranking/parsers/abusech.py
@ -1,11 +1,13 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 from dateutil.parser import parse
 import re
 from io import BytesIO

+from typing import List

-def parse_raw_file(self, f: BytesIO):
+
+def parse_raw_file(self, f: BytesIO) -> List[bytes]:
    self.datetime = parse(re.findall(b'# Generated on (.*)#\n', f.getvalue())[0])
    return self.extract_ipv4(f.getvalue())
--- a/bgpranking/parsers/bambenekconsulting.py
+++ b/bgpranking/parsers/bambenekconsulting.py
@ -1,11 +1,13 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 from dateutil.parser import parse
 import re
 from io import BytesIO

+from typing import List

-def parse_raw_file(self, f: BytesIO):
+
+def parse_raw_file(self, f: BytesIO) -> List[bytes]:
    self.datetime = parse(re.findall(b'## Feed generated at: (.*)\n', f.getvalue())[0])
    return self.extract_ipv4(f.getvalue())
--- a/bgpranking/parsers/default.py
+++ b/bgpranking/parsers/default.py
@ -1,14 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-from io import BytesIO
-from datetime import datetime
-
-from ..simple_feed_fetcher import RawFileImporter
-
-
-class DefaultImporter(RawFileImporter):
-
-    def parse_raw_file(self, f: BytesIO):
-        self.datetime = datetime.now()
-        return self.extract_ipv4(f.getvalue())
--- a/bgpranking/parsers/dshield.py
+++ b/bgpranking/parsers/dshield.py
@ -4,9 +4,10 @@
 from dateutil.parser import parse
 import re
 from io import BytesIO
+from typing import List


-def parse_raw_file(self, f: BytesIO):
+def parse_raw_file(self, f: BytesIO) -> List[bytes]:
    self.datetime = parse(re.findall(b'# updated (.*)\n', f.getvalue())[0])
    iplist = self.extract_ipv4(f.getvalue())
    # The IPS have leading 0s. Getting tid of them directly here.
--- a/bgpranking/parsers/malc0de.py
+++ b/bgpranking/parsers/malc0de.py
@ -1,11 +1,13 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 from dateutil.parser import parse
 import re
 from io import BytesIO

+from typing import List

-def parse_raw_file(self, f: BytesIO):
+
+def parse_raw_file(self, f: BytesIO) -> List[bytes]:
    self.datetime = parse(re.findall(b'// Last updated (.*)\n', f.getvalue())[0])
    return self.extract_ipv4(f.getvalue())
--- a/bgpranking/parsers/nothink.py
+++ b/bgpranking/parsers/nothink.py
@ -1,11 +1,12 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 from dateutil.parser import parse
 import re
 from io import BytesIO

+from typing import List

-def parse_raw_file(self, f: BytesIO):
+def parse_raw_file(self, f: BytesIO)  -> List[bytes]:
    self.datetime = parse(re.findall(b'# Generated (.*)\n', f.getvalue())[0])
    return self.extract_ipv4(f.getvalue())
--- a/bgpranking/parsers/shadowserver.py
+++ b/bgpranking/parsers/shadowserver.py
@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+from dateutil.parser import parse
+from csv import DictReader
+from io import BytesIO, StringIO
+from typing import Tuple, Generator
+from datetime import datetime
+
+
+def parse_raw_file(self, f: BytesIO) -> Generator[Tuple[str, datetime], None, None]:
+    default_ts = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
+    reader = DictReader(StringIO(f.getvalue().decode()))
+    for row in reader:
+        if 'timestamp' in row:
+            ts = parse(row['timestamp'])
+        else:
+            ts = default_ts
+
+        if 'ip' in row:
+            ip = row['ip']
+        elif 'src_ip' in row:
+            # For sinkhole6_http
+            ip = row['src_ip']
+        else:
+            self.logger.critical(f'No IPs in the list {self.source}.')
+            break
+        yield ip, ts
--- a/bgpranking/ranking.py
+++ b/bgpranking/ranking.py
@ -4,7 +4,7 @@
 import logging
 from redis import StrictRedis
 from .libs.helpers import set_running, unset_running, get_socket_path, load_config_files
-from datetime import date
+from datetime import datetime, date, timedelta
 from ipaddress import ip_network
 from pathlib import Path

@ -22,40 +22,34 @@ class Ranking():
        self.logger = logging.getLogger(f'{self.__class__.__name__}')
        self.logger.setLevel(loglevel)

-    def compute(self):
-        self.logger.info('Start ranking')
-        set_running(self.__class__.__name__)
-        today = date.today().isoformat()
+    def rank_a_day(self, day: str):
+        # FIXME: If we want to rank an older date, we need to hav older datasets for the announces
        v4_last, v6_last = self.asn_meta.mget('v4|last', 'v6|last')
-        if not v4_last or not v6_last:
-            '''Failsafe if asn_meta has not been populated yet'''
-            unset_running(self.__class__.__name__)
-            return
-        asns_aggregation_key_v4 = f'{today}|asns|v4'
-        asns_aggregation_key_v6 = f'{today}|asns|v6'
+        asns_aggregation_key_v4 = f'{day}|asns|v4'
+        asns_aggregation_key_v6 = f'{day}|asns|v6'
        to_delete = set([asns_aggregation_key_v4, asns_aggregation_key_v6])
        r_pipeline = self.ranking.pipeline()
-        for source in self.storage.smembers(f'{today}|sources'):
-            self.logger.info(f'{today} - Ranking source: {source}')
-            source_aggregation_key_v4 = f'{today}|{source}|asns|v4'
-            source_aggregation_key_v6 = f'{today}|{source}|asns|v6'
+        for source in self.storage.smembers(f'{day}|sources'):
+            self.logger.info(f'{day} - Ranking source: {source}')
+            source_aggregation_key_v4 = f'{day}|{source}|asns|v4'
+            source_aggregation_key_v6 = f'{day}|{source}|asns|v6'
            to_delete.update([source_aggregation_key_v4, source_aggregation_key_v6])
-            for asn in self.storage.smembers(f'{today}|{source}'):
-                prefixes_aggregation_key_v4 = f'{today}|{asn}|v4'
-                prefixes_aggregation_key_v6 = f'{today}|{asn}|v6'
+            for asn in self.storage.smembers(f'{day}|{source}'):
+                prefixes_aggregation_key_v4 = f'{day}|{asn}|v4'
+                prefixes_aggregation_key_v6 = f'{day}|{asn}|v6'
                to_delete.update([prefixes_aggregation_key_v4, prefixes_aggregation_key_v6])
                if asn == '0':
                    # Default ASN when no matches. Probably spoofed.
                    continue
-                self.logger.debug(f'{today} - Ranking source: {source} / ASN: {asn}')
+                self.logger.debug(f'{day} - Ranking source: {source} / ASN: {asn}')
                asn_rank_v4 = 0.0
                asn_rank_v6 = 0.0
-                for prefix in self.storage.smembers(f'{today}|{source}|{asn}'):
+                for prefix in self.storage.smembers(f'{day}|{source}|{asn}'):
                    ips = set([ip_ts.split('|')[0]
-                               for ip_ts in self.storage.smembers(f'{today}|{source}|{asn}|{prefix}')])
+                               for ip_ts in self.storage.smembers(f'{day}|{source}|{asn}|{prefix}')])
                    py_prefix = ip_network(prefix)
                    prefix_rank = float(len(ips)) / py_prefix.num_addresses
-                    r_pipeline.zadd(f'{today}|{source}|{asn}|v{py_prefix.version}|prefixes', prefix_rank, prefix)
+                    r_pipeline.zadd(f'{day}|{source}|{asn}|v{py_prefix.version}|prefixes', prefix_rank, prefix)
                    if py_prefix.version == 4:
                        asn_rank_v4 += len(ips) * self.config_files[source]['impact']
                        r_pipeline.zincrby(prefixes_aggregation_key_v4, prefix, prefix_rank * self.config_files[source]['impact'])
@ -66,17 +60,32 @@ class Ranking():
                if v4count:
                    asn_rank_v4 /= float(v4count)
                    if asn_rank_v4:
-                        r_pipeline.set(f'{today}|{source}|{asn}|v4', asn_rank_v4)
+                        r_pipeline.set(f'{day}|{source}|{asn}|v4', asn_rank_v4)
                        r_pipeline.zincrby(asns_aggregation_key_v4, asn, asn_rank_v4)
                        r_pipeline.zadd(source_aggregation_key_v4, asn_rank_v4, asn)
                if v6count:
                    asn_rank_v6 /= float(v6count)
                    if asn_rank_v6:
-                        r_pipeline.set(f'{today}|{source}|{asn}|v6', asn_rank_v6)
+                        r_pipeline.set(f'{day}|{source}|{asn}|v6', asn_rank_v6)
                        r_pipeline.zincrby(asns_aggregation_key_v6, asn, asn_rank_v6)
                        r_pipeline.zadd(source_aggregation_key_v6, asn_rank_v4, asn)
        self.ranking.delete(*to_delete)
        r_pipeline.execute()

+    def compute(self):
+        self.logger.info('Start ranking')
+        set_running(self.__class__.__name__)
+        v4_last, v6_last = self.asn_meta.mget('v4|last', 'v6|last')
+        if not v4_last or not v6_last:
+            '''Failsafe if asn_meta has not been populated yet'''
+            unset_running(self.__class__.__name__)
+            return
+        today = date.today()
+        now = datetime.now()
+        today12am = now.replace(hour=12, minute=0, second=0, microsecond=0)
+        if now < today12am:
+            # Compute yesterday and today's ranking (useful when we have lists generated only once a day)
+            self.rank_a_day((today - timedelta(days=1)).isoformat())
+        self.rank_a_day(today.isoformat())
        unset_running(self.__class__.__name__)
        self.logger.info('Ranking done.')
--- a/bgpranking/shadowserverfetcher.py
+++ b/bgpranking/shadowserverfetcher.py
@ -0,0 +1,159 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import aiohttp
+import logging
+from bs4 import BeautifulSoup
+from dateutil.parser import parse
+from typing import Tuple
+from datetime import datetime, date
+from pathlib import Path
+from .libs.helpers import safe_create_dir, set_running, unset_running
+import json
+
+
+class ShadowServerFetcher():
+
+    def __init__(self, user, password, config_path_modules: Path, storage_directory: Path,
+                 loglevel: int=logging.DEBUG) -> None:
+        self.__init_logger(loglevel)
+        self.storage_directory = storage_directory
+        self.config_path_modules = config_path_modules
+        self.user = user
+        self.password = password
+        self.index_page = 'https://dl.shadowserver.org/reports/index.php'
+        self.vendor = 'shadowserver'
+        self.known_list_types = ('blacklist', 'botnet', 'cc', 'cisco', 'cwsandbox', 'drone',
+                                 'microsoft', 'scan', 'sinkhole6', 'sinkhole')
+        self.first_available_day = None
+        self.last_available_day = None
+        self.available_entries = {}
+
+    def __init_logger(self, loglevel):
+        self.logger = logging.getLogger(f'{self.__class__.__name__}')
+        self.logger.setLevel(loglevel)
+
+    async def __get_index(self):
+        auth_details = {'user': self.user, 'password': self.password, 'login': 'Login'}
+        async with aiohttp.ClientSession() as s:
+            self.logger.debug('Fetching the index.')
+            async with s.post(self.index_page, data=auth_details) as r:
+                return await r.text()
+
+    async def __build_daily_dict(self):
+        html_index = await self.__get_index()
+        soup = BeautifulSoup(html_index, 'html.parser')
+        treeview = soup.find(id='treemenu1')
+        for y in treeview.select('> li'):
+            year = y.contents[0]
+            for m in y.contents[1].select('> li'):
+                month = m.contents[0]
+                for d in m.contents[1].select('> li'):
+                    day = d.contents[0]
+                    date = parse(f'{year} {month} {day}').date()
+                    self.available_entries[date.isoformat()] = []
+                    for a in d.contents[1].find_all('a', href=True):
+                        if not self.first_available_day:
+                            self.first_available_day = date
+                        self.last_available_day = date
+                        self.available_entries[date.isoformat()].append((a['href'], a.string))
+        self.logger.debug('Dictionary created.')
+
+    def __normalize_day(self, day: Tuple[str, date, datetime]=None) -> str:
+        if not day:
+            if not self.last_available_day:
+                raise Exception('Unable to figure out the last available day. You need to run build_daily_dict first')
+            day = self.last_available_day
+        else:
+            if isinstance(day, str):
+                day = parse(day).date()
+            elif isinstance(day, datetime):
+                day = day.date()
+        return day.isoformat()
+
+    def __split_name(self, name):
+        type_content, country, list_type = name.split('-')
+        if '_' in type_content:
+            type_content, details_type = type_content.split('_', maxsplit=1)
+            if '_' in details_type:
+                details_type, sub = details_type.split('_')
+                return list_type, country, (type_content, details_type, sub)
+            return list_type, country, (type_content, details_type)
+        return list_type, country, (type_content)
+
+    def __check_config(self, filename: str) -> Path:
+        self.logger.debug(f'Working on config for {filename}.')
+        config = {'vendor': 'shadowserver', 'parser': '.parsers.shadowserver'}
+        type_content, _, type_details = self.__split_name(filename)
+        prefix = type_content.split('.')[0]
+        config['name'] = '{}-{}'.format(prefix, '_'.join(type_details))
+
+        main_type = type_details[0]
+        if main_type not in self.known_list_types:
+            self.logger.warning(f'Unknown type: {main_type}. Please update the config creator script.')
+            return None
+
+        if main_type == 'blacklist':
+            config['impact'] = 5
+        elif main_type == 'botnet':
+            config['impact'] = 2
+        elif main_type == 'cc':
+            config['impact'] = 5
+        elif main_type == 'cisco':
+            config['impact'] = 3
+        elif main_type == 'cwsandbox':
+            config['impact'] = 5
+        elif main_type == 'drone':
+            config['impact'] = 2
+        elif main_type == 'microsoft':
+            config['impact'] = 3
+        elif main_type == 'scan':
+            config['impact'] = 1
+        elif main_type == 'sinkhole6':
+            config['impact'] = 2
+        elif main_type == 'sinkhole':
+            config['impact'] = 2
+        if not (self.config_path_modules / f"{config['vendor']}_{config['name']}.json").exists():
+            self.logger.debug(f'Creating config file for {filename}.')
+            with open(self.config_path_modules / f"{config['vendor']}_{config['name']}.json", 'w') as f:
+                json.dump(config, f, indent=2)
+        else:
+            with open(self.config_path_modules / f"{config['vendor']}_{config['name']}.json", 'r') as f:
+                # Validate new config file with old
+                config_current = json.load(f)
+                if config_current != config:
+                    self.logger.warning('The config file created by this script is different from the one on disk: \n{}\n{}'.format(json.dumps(config), json.dumps(config_current)))
+        # Init list directory
+        directory = self.storage_directory / config['vendor'] / config['name']
+        safe_create_dir(directory)
+        meta = directory / 'meta'
+        safe_create_dir(meta)
+        archive_dir = directory / 'archive'
+        safe_create_dir(archive_dir)
+        self.logger.debug(f'Done with config for {filename}.')
+        return directory
+
+    async def download_daily_entries(self, day: Tuple[str, date, datetime]=None):
+        set_running(f'{self.__class__.__name__}')
+        await self.__build_daily_dict()
+        for url, filename in self.available_entries[self.__normalize_day(day)]:
+            storage_dir = self.__check_config(filename)
+            if not storage_dir:
+                continue
+            # Check if the file we're trying to download has already been downloaded. Skip if True.
+            uuid = url.split('/')[-1]
+            if (storage_dir / 'meta' / 'last_download').exists():
+                with open(storage_dir / 'meta' / 'last_download') as f:
+                    last_download_uuid = f.read()
+                if last_download_uuid == uuid:
+                    self.logger.debug(f'Already downloaded: {url}.')
+                    continue
+            async with aiohttp.ClientSession() as s:
+                async with s.get(url) as r:
+                    self.logger.info(f'Downloading {url}.')
+                    content = await r.content.read()
+                    with (storage_dir / '{}.txt'.format(datetime.now().isoformat())).open('wb') as f:
+                        f.write(content)
+                    with open(storage_dir / 'meta' / 'last_download', 'w') as f:
+                        f.write(uuid)
+        unset_running(f'{self.__class__.__name__}')
--- a/bin/archiver.py
+++ b/bin/archiver.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 from bgpranking.archive import DeepArchive
--- a/bin/asn_descriptions.py
+++ b/bin/asn_descriptions.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 import logging
--- a/bin/dbinsert.py
+++ b/bin/dbinsert.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 import logging
--- a/bin/fetcher.py
+++ b/bin/fetcher.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 import logging
@ -21,20 +21,28 @@ class ModulesManager(AbstractManager):
        if not config_dir:
            config_dir = get_config_path()
        if not storage_directory:
-            storage_directory = get_homedir() / 'rawdata'
-        modules_config = config_dir / 'modules'
-        modules_paths = [modulepath for modulepath in modules_config.glob('*.json')]
-        self.modules = [Fetcher(path, storage_directory, loglevel) for path in modules_paths]
+            self.storage_directory = get_homedir() / 'rawdata'
+        self.modules_config = config_dir / 'modules'
+        self.modules_paths = [modulepath for modulepath in self.modules_config.glob('*.json')]
+        self.modules = [Fetcher(path, self.storage_directory, loglevel) for path in self.modules_paths]

    def _to_run_forever(self):
-        loop = asyncio.get_event_loop()
-        try:
-            loop.run_until_complete(asyncio.gather(
-                *[module.fetch_list() for module in self.modules if module.fetcher],
-                return_exceptions=True)
-            )
-        except aiohttp.client_exceptions.ClientConnectorError as e:
-            self.logger.critical(f'Exception while fetching lists: {e}')
+        # Check if there are new config files
+        new_modules_paths = [modulepath for modulepath in self.modules_config.glob('*.json') if modulepath not in self.modules_paths]
+        self.modules += [Fetcher(path, self.storage_directory, self.loglevel) for path in new_modules_paths]
+        self.modules_paths += new_modules_paths
+
+        if self.modules:
+            loop = asyncio.get_event_loop()
+            try:
+                loop.run_until_complete(asyncio.gather(
+                    *[module.fetch_list() for module in self.modules if module.fetcher],
+                    return_exceptions=True)
+                )
+            except aiohttp.client_exceptions.ClientConnectorError as e:
+                self.logger.critical(f'Exception while fetching lists: {e}')
+        else:
+            self.logger.info('No config files were found so there are no fetchers running yet. Will try again later.')


 if __name__ == '__main__':
--- a/bin/loadprefixes.py
+++ b/bin/loadprefixes.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 import logging
--- a/bin/monitor.py
+++ b/bin/monitor.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 from bgpranking.monitor import Monitor
--- a/bin/parser.py
+++ b/bin/parser.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 import logging
@ -19,13 +19,21 @@ class ParserManager(AbstractManager):
        if not config_dir:
            config_dir = get_config_path()
        if not storage_directory:
-            storage_directory = get_homedir() / 'rawdata'
-        modules_config = config_dir / 'modules'
-        modules_paths = [modulepath for modulepath in modules_config.glob('*.json')]
-        self.modules = [RawFilesParser(path, storage_directory, loglevel) for path in modules_paths]
+            self.storage_directory = get_homedir() / 'rawdata'
+        self.modules_config = config_dir / 'modules'
+        self.modules_paths = [modulepath for modulepath in self.modules_config.glob('*.json')]
+        self.modules = [RawFilesParser(path, self.storage_directory, loglevel) for path in self.modules_paths]

    def _to_run_forever(self):
-        [module.parse_raw_files() for module in self.modules]
+        # Check if there are new config files
+        new_modules_paths = [modulepath for modulepath in self.modules_config.glob('*.json') if modulepath not in self.modules_paths]
+        self.modules += [RawFilesParser(path, self.storage_directory, self.loglevel) for path in new_modules_paths]
+        self.modules_paths += new_modules_paths
+
+        if self.modules:
+            [module.parse_raw_files() for module in self.modules]
+        else:
+            self.logger.warning('No config files were found so there are no parsers running yet. Will try again later.')


 if __name__ == '__main__':
--- a/bin/ranking.py
+++ b/bin/ranking.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 import logging
--- a/bin/rislookup.py
+++ b/bin/rislookup.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 import logging
--- a/bin/run_backend.py
+++ b/bin/run_backend.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 from bgpranking.libs.helpers import get_homedir, check_running
--- a/bin/sanitizer.py
+++ b/bin/sanitizer.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 import logging
--- a/bin/shutdown.py
+++ b/bin/shutdown.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 from bgpranking.libs.helpers import is_running, get_socket_path
--- a/bin/ssfetcher.py
+++ b/bin/ssfetcher.py
@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import logging
+import json
+import asyncio
+from pathlib import Path
+import aiohttp
+
+from bgpranking.abstractmanager import AbstractManager
+from bgpranking.shadowserverfetcher import ShadowServerFetcher
+from bgpranking.libs.helpers import get_config_path, get_homedir
+
+logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
+                    level=logging.INFO, datefmt='%I:%M:%S')
+
+
+class ShadowServerManager(AbstractManager):
+
+    def __init__(self, config_dir: Path=None, storage_directory: Path=None, loglevel: int=logging.DEBUG):
+        super().__init__(loglevel)
+        self.config = True
+        if not config_dir:
+            config_dir = get_config_path()
+        if not (config_dir / 'shadowserver.json').exists():
+            self.config = False
+            self.logger.warning(f'No config file available, the shadow server module will not be launched.')
+            return
+        with open(config_dir / 'shadowserver.json') as f:
+            ss_config = json.load(f)
+        if not storage_directory:
+            storage_directory = get_homedir() / 'rawdata'
+        modules_config = config_dir / 'modules'
+        self.fetcher = ShadowServerFetcher(ss_config['user'], ss_config['password'], modules_config, storage_directory, loglevel)
+
+    def _to_run_forever(self):
+        loop = asyncio.get_event_loop()
+        try:
+            loop.run_until_complete(self.fetcher.download_daily_entries())
+        except aiohttp.client_exceptions.ClientConnectorError as e:
+            self.logger.critical(f'Exception while fetching Shadow Server lists: {e}')
+
+
+if __name__ == '__main__':
+    modules_manager = ShadowServerManager()
+    if modules_manager.config:
+        modules_manager.run(sleep_in_sec=3600)
--- a/bin/start.py
+++ b/bin/start.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 from subprocess import Popen
@ -13,6 +13,7 @@ if __name__ == '__main__':
    Popen(['loadprefixes.py'])
    Popen(['rislookup.py'])
    Popen(['fetcher.py'])
+    Popen(['ssfetcher.py'])
    Popen(['parser.py'])
    Popen(['sanitizer.py'])
    Popen(['dbinsert.py'])
--- a/bin/stop.py
+++ b/bin/stop.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 from subprocess import Popen
--- a/setup.py
+++ b/setup.py
@ -13,7 +13,7 @@ setup(
    description='BGP Ranking, the new one..',
    packages=['bgpranking'],
    scripts=['bin/archiver.py', 'bin/dbinsert.py', 'bin/fetcher.py', 'bin/parser.py',
-             'bin/loadprefixes.py', 'bin/rislookup.py', 'bin/sanitizer.py', 'bin/run_backend.py',
+             'bin/loadprefixes.py', 'bin/rislookup.py', 'bin/sanitizer.py', 'bin/run_backend.py', 'bin/ssfetcher.py',
             'bin/monitor.py', 'bin/ranking.py', 'bin/asn_descriptions.py', 'bin/start.py', 'bin/stop.py', 'bin/shutdown.py'],
    classifiers=[
        'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)',