new: Add Shadowserver module
parent
faa3c634f2
commit
6045635b72
|
@ -110,3 +110,11 @@ rawdata
|
||||||
storage/ardb.pid
|
storage/ardb.pid
|
||||||
storage/data
|
storage/data
|
||||||
storage/repl
|
storage/repl
|
||||||
|
|
||||||
|
# Config file shadow server with password
|
||||||
|
bgpranking/config/shadowserver.json
|
||||||
|
# Ths shadow server config files are dynamically generated
|
||||||
|
bgpranking/config/modules/shadowserver_*.json
|
||||||
|
|
||||||
|
# Do not store the d3 lib in the repo
|
||||||
|
website/web/static/d3*.js
|
||||||
|
|
|
@ -10,6 +10,7 @@ from .libs.helpers import long_sleep, shutdown_requested
|
||||||
class AbstractManager(ABC):
|
class AbstractManager(ABC):
|
||||||
|
|
||||||
def __init__(self, loglevel: int=logging.DEBUG):
|
def __init__(self, loglevel: int=logging.DEBUG):
|
||||||
|
self.loglevel = loglevel
|
||||||
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
||||||
self.logger.setLevel(loglevel)
|
self.logger.setLevel(loglevel)
|
||||||
self.logger.info(f'Initializing {self.__class__.__name__}')
|
self.logger.info(f'Initializing {self.__class__.__name__}')
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
set -x
|
||||||
|
|
||||||
|
find . -maxdepth 1 -type f -name "*.json" ! -iname "shadowserver*.json" -delete
|
|
@ -0,0 +1,4 @@
|
||||||
|
{
|
||||||
|
"user": "[USERNAME]",
|
||||||
|
"password": "[PASSWORD]"
|
||||||
|
}
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
|
@ -112,10 +112,10 @@ class Fetcher():
|
||||||
self.logger.debug('New list, no hisorical files')
|
self.logger.debug('New list, no hisorical files')
|
||||||
# nothing has been downloaded ever, moving on
|
# nothing has been downloaded ever, moving on
|
||||||
return False
|
return False
|
||||||
|
dl_hash = sha512(downloaded)
|
||||||
for last_file in to_check:
|
for last_file in to_check:
|
||||||
with last_file.open('rb') as f:
|
with last_file.open('rb') as f:
|
||||||
last_hash = sha512(f.read())
|
last_hash = sha512(f.read())
|
||||||
dl_hash = sha512(downloaded)
|
|
||||||
if (dl_hash.digest() == last_hash.digest() and
|
if (dl_hash.digest() == last_hash.digest() and
|
||||||
parser.parse(last_file.name.split('.')[0]).date() == date.today()):
|
parser.parse(last_file.name.split('.')[0]).date() == date.today()):
|
||||||
self.logger.debug('Same file already downloaded today.')
|
self.logger.debug('Same file already downloaded today.')
|
||||||
|
|
|
@ -11,7 +11,7 @@ from uuid import uuid4
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
import importlib
|
import importlib
|
||||||
|
|
||||||
from typing import List
|
from typing import List, Union, Tuple
|
||||||
import types
|
import types
|
||||||
|
|
||||||
from .libs.helpers import safe_create_dir, set_running, unset_running, get_socket_path
|
from .libs.helpers import safe_create_dir, set_running, unset_running, get_socket_path
|
||||||
|
@ -20,7 +20,7 @@ from .libs.helpers import safe_create_dir, set_running, unset_running, get_socke
|
||||||
class RawFilesParser():
|
class RawFilesParser():
|
||||||
|
|
||||||
def __init__(self, config_file: Path, storage_directory: Path,
|
def __init__(self, config_file: Path, storage_directory: Path,
|
||||||
loglevel: int=logging.DEBUG):
|
loglevel: int=logging.DEBUG) -> None:
|
||||||
with open(config_file, 'r') as f:
|
with open(config_file, 'r') as f:
|
||||||
module_parameters = json.load(f)
|
module_parameters = json.load(f)
|
||||||
self.vendor = module_parameters['vendor']
|
self.vendor = module_parameters['vendor']
|
||||||
|
@ -36,7 +36,7 @@ class RawFilesParser():
|
||||||
self.redis_intake = StrictRedis(unix_socket_path=get_socket_path('intake'), db=0)
|
self.redis_intake = StrictRedis(unix_socket_path=get_socket_path('intake'), db=0)
|
||||||
self.logger.debug(f'Starting intake on {self.source}')
|
self.logger.debug(f'Starting intake on {self.source}')
|
||||||
|
|
||||||
def __init_logger(self, loglevel):
|
def __init_logger(self, loglevel) -> None:
|
||||||
self.logger = logging.getLogger(f'{self.__class__.__name__}-{self.vendor}-{self.listname}')
|
self.logger = logging.getLogger(f'{self.__class__.__name__}-{self.vendor}-{self.listname}')
|
||||||
self.logger.setLevel(loglevel)
|
self.logger.setLevel(loglevel)
|
||||||
|
|
||||||
|
@ -52,12 +52,12 @@ class RawFilesParser():
|
||||||
Only run it when needed, it is nasty and slow'''
|
Only run it when needed, it is nasty and slow'''
|
||||||
return ['.'.join(str(int(part)) for part in ip.split(b'.')).encode() for ip in ips]
|
return ['.'.join(str(int(part)) for part in ip.split(b'.')).encode() for ip in ips]
|
||||||
|
|
||||||
def parse_raw_file(self, f: BytesIO):
|
def parse_raw_file(self, f: BytesIO) -> List[bytes]:
|
||||||
# If the list doesn't provide a time, fallback to current day, midnight
|
# If the list doesn't provide a time, fallback to current day, midnight
|
||||||
self.datetime = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
|
self.datetime = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
|
||||||
return self.extract_ipv4(f.getvalue())
|
return self.extract_ipv4(f.getvalue())
|
||||||
|
|
||||||
def parse_raw_files(self):
|
def parse_raw_files(self) -> None:
|
||||||
set_running(f'{self.__class__.__name__}-{self.source}')
|
set_running(f'{self.__class__.__name__}-{self.source}')
|
||||||
nb_unparsable_files = len([f for f in self.unparsable_dir.iterdir() if f.is_file()])
|
nb_unparsable_files = len([f for f in self.unparsable_dir.iterdir() if f.is_file()])
|
||||||
if nb_unparsable_files:
|
if nb_unparsable_files:
|
||||||
|
@ -69,9 +69,13 @@ class RawFilesParser():
|
||||||
to_parse = BytesIO(f.read())
|
to_parse = BytesIO(f.read())
|
||||||
p = self.redis_intake.pipeline()
|
p = self.redis_intake.pipeline()
|
||||||
for ip in self.parse_raw_file(to_parse):
|
for ip in self.parse_raw_file(to_parse):
|
||||||
|
if isinstance(ip, tuple):
|
||||||
|
ip, datetime = ip
|
||||||
|
else:
|
||||||
|
datetime = self.datetime
|
||||||
uuid = uuid4()
|
uuid = uuid4()
|
||||||
p.hmset(uuid, {'ip': ip, 'source': self.source,
|
p.hmset(uuid, {'ip': ip, 'source': self.source,
|
||||||
'datetime': self.datetime.isoformat()})
|
'datetime': datetime.isoformat()})
|
||||||
p.sadd('intake', uuid)
|
p.sadd('intake', uuid)
|
||||||
p.execute()
|
p.execute()
|
||||||
self._archive(filepath)
|
self._archive(filepath)
|
||||||
|
@ -81,10 +85,10 @@ class RawFilesParser():
|
||||||
finally:
|
finally:
|
||||||
unset_running(f'{self.__class__.__name__}-{self.source}')
|
unset_running(f'{self.__class__.__name__}-{self.source}')
|
||||||
|
|
||||||
def _archive(self, filepath: Path):
|
def _archive(self, filepath: Path) -> None:
|
||||||
'''After processing, move file to the archive directory'''
|
'''After processing, move file to the archive directory'''
|
||||||
filepath.rename(self.directory / 'archive' / filepath.name)
|
filepath.rename(self.directory / 'archive' / filepath.name)
|
||||||
|
|
||||||
def _unparsable(self, filepath: Path):
|
def _unparsable(self, filepath: Path) -> None:
|
||||||
'''After processing, move file to the archive directory'''
|
'''After processing, move file to the archive directory'''
|
||||||
filepath.rename(self.unparsable_dir / filepath.name)
|
filepath.rename(self.unparsable_dir / filepath.name)
|
||||||
|
|
|
@ -1,11 +1,13 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from dateutil.parser import parse
|
from dateutil.parser import parse
|
||||||
import re
|
import re
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
|
from typing import List
|
||||||
|
|
||||||
def parse_raw_file(self, f: BytesIO):
|
|
||||||
|
def parse_raw_file(self, f: BytesIO) -> List[bytes]:
|
||||||
self.datetime = parse(re.findall(b'# Generated on (.*)#\n', f.getvalue())[0])
|
self.datetime = parse(re.findall(b'# Generated on (.*)#\n', f.getvalue())[0])
|
||||||
return self.extract_ipv4(f.getvalue())
|
return self.extract_ipv4(f.getvalue())
|
||||||
|
|
|
@ -1,11 +1,13 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from dateutil.parser import parse
|
from dateutil.parser import parse
|
||||||
import re
|
import re
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
|
from typing import List
|
||||||
|
|
||||||
def parse_raw_file(self, f: BytesIO):
|
|
||||||
|
def parse_raw_file(self, f: BytesIO) -> List[bytes]:
|
||||||
self.datetime = parse(re.findall(b'## Feed generated at: (.*)\n', f.getvalue())[0])
|
self.datetime = parse(re.findall(b'## Feed generated at: (.*)\n', f.getvalue())[0])
|
||||||
return self.extract_ipv4(f.getvalue())
|
return self.extract_ipv4(f.getvalue())
|
||||||
|
|
|
@ -1,14 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from io import BytesIO
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from ..simple_feed_fetcher import RawFileImporter
|
|
||||||
|
|
||||||
|
|
||||||
class DefaultImporter(RawFileImporter):
|
|
||||||
|
|
||||||
def parse_raw_file(self, f: BytesIO):
|
|
||||||
self.datetime = datetime.now()
|
|
||||||
return self.extract_ipv4(f.getvalue())
|
|
|
@ -4,9 +4,10 @@
|
||||||
from dateutil.parser import parse
|
from dateutil.parser import parse
|
||||||
import re
|
import re
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
|
||||||
def parse_raw_file(self, f: BytesIO):
|
def parse_raw_file(self, f: BytesIO) -> List[bytes]:
|
||||||
self.datetime = parse(re.findall(b'# updated (.*)\n', f.getvalue())[0])
|
self.datetime = parse(re.findall(b'# updated (.*)\n', f.getvalue())[0])
|
||||||
iplist = self.extract_ipv4(f.getvalue())
|
iplist = self.extract_ipv4(f.getvalue())
|
||||||
# The IPS have leading 0s. Getting tid of them directly here.
|
# The IPS have leading 0s. Getting tid of them directly here.
|
||||||
|
|
|
@ -1,11 +1,13 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from dateutil.parser import parse
|
from dateutil.parser import parse
|
||||||
import re
|
import re
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
|
from typing import List
|
||||||
|
|
||||||
def parse_raw_file(self, f: BytesIO):
|
|
||||||
|
def parse_raw_file(self, f: BytesIO) -> List[bytes]:
|
||||||
self.datetime = parse(re.findall(b'// Last updated (.*)\n', f.getvalue())[0])
|
self.datetime = parse(re.findall(b'// Last updated (.*)\n', f.getvalue())[0])
|
||||||
return self.extract_ipv4(f.getvalue())
|
return self.extract_ipv4(f.getvalue())
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from dateutil.parser import parse
|
from dateutil.parser import parse
|
||||||
import re
|
import re
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
|
from typing import List
|
||||||
|
|
||||||
def parse_raw_file(self, f: BytesIO):
|
def parse_raw_file(self, f: BytesIO) -> List[bytes]:
|
||||||
self.datetime = parse(re.findall(b'# Generated (.*)\n', f.getvalue())[0])
|
self.datetime = parse(re.findall(b'# Generated (.*)\n', f.getvalue())[0])
|
||||||
return self.extract_ipv4(f.getvalue())
|
return self.extract_ipv4(f.getvalue())
|
||||||
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from dateutil.parser import parse
|
||||||
|
from csv import DictReader
|
||||||
|
from io import BytesIO, StringIO
|
||||||
|
from typing import Tuple, Generator
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
def parse_raw_file(self, f: BytesIO) -> Generator[Tuple[str, datetime], None, None]:
|
||||||
|
default_ts = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
|
||||||
|
reader = DictReader(StringIO(f.getvalue().decode()))
|
||||||
|
for row in reader:
|
||||||
|
if 'timestamp' in row:
|
||||||
|
ts = parse(row['timestamp'])
|
||||||
|
else:
|
||||||
|
ts = default_ts
|
||||||
|
|
||||||
|
if 'ip' in row:
|
||||||
|
ip = row['ip']
|
||||||
|
elif 'src_ip' in row:
|
||||||
|
# For sinkhole6_http
|
||||||
|
ip = row['src_ip']
|
||||||
|
else:
|
||||||
|
self.logger.critical(f'No IPs in the list {self.source}.')
|
||||||
|
break
|
||||||
|
yield ip, ts
|
|
@ -4,7 +4,7 @@
|
||||||
import logging
|
import logging
|
||||||
from redis import StrictRedis
|
from redis import StrictRedis
|
||||||
from .libs.helpers import set_running, unset_running, get_socket_path, load_config_files
|
from .libs.helpers import set_running, unset_running, get_socket_path, load_config_files
|
||||||
from datetime import date
|
from datetime import datetime, date, timedelta
|
||||||
from ipaddress import ip_network
|
from ipaddress import ip_network
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
@ -22,40 +22,34 @@ class Ranking():
|
||||||
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
||||||
self.logger.setLevel(loglevel)
|
self.logger.setLevel(loglevel)
|
||||||
|
|
||||||
def compute(self):
|
def rank_a_day(self, day: str):
|
||||||
self.logger.info('Start ranking')
|
# FIXME: If we want to rank an older date, we need to hav older datasets for the announces
|
||||||
set_running(self.__class__.__name__)
|
|
||||||
today = date.today().isoformat()
|
|
||||||
v4_last, v6_last = self.asn_meta.mget('v4|last', 'v6|last')
|
v4_last, v6_last = self.asn_meta.mget('v4|last', 'v6|last')
|
||||||
if not v4_last or not v6_last:
|
asns_aggregation_key_v4 = f'{day}|asns|v4'
|
||||||
'''Failsafe if asn_meta has not been populated yet'''
|
asns_aggregation_key_v6 = f'{day}|asns|v6'
|
||||||
unset_running(self.__class__.__name__)
|
|
||||||
return
|
|
||||||
asns_aggregation_key_v4 = f'{today}|asns|v4'
|
|
||||||
asns_aggregation_key_v6 = f'{today}|asns|v6'
|
|
||||||
to_delete = set([asns_aggregation_key_v4, asns_aggregation_key_v6])
|
to_delete = set([asns_aggregation_key_v4, asns_aggregation_key_v6])
|
||||||
r_pipeline = self.ranking.pipeline()
|
r_pipeline = self.ranking.pipeline()
|
||||||
for source in self.storage.smembers(f'{today}|sources'):
|
for source in self.storage.smembers(f'{day}|sources'):
|
||||||
self.logger.info(f'{today} - Ranking source: {source}')
|
self.logger.info(f'{day} - Ranking source: {source}')
|
||||||
source_aggregation_key_v4 = f'{today}|{source}|asns|v4'
|
source_aggregation_key_v4 = f'{day}|{source}|asns|v4'
|
||||||
source_aggregation_key_v6 = f'{today}|{source}|asns|v6'
|
source_aggregation_key_v6 = f'{day}|{source}|asns|v6'
|
||||||
to_delete.update([source_aggregation_key_v4, source_aggregation_key_v6])
|
to_delete.update([source_aggregation_key_v4, source_aggregation_key_v6])
|
||||||
for asn in self.storage.smembers(f'{today}|{source}'):
|
for asn in self.storage.smembers(f'{day}|{source}'):
|
||||||
prefixes_aggregation_key_v4 = f'{today}|{asn}|v4'
|
prefixes_aggregation_key_v4 = f'{day}|{asn}|v4'
|
||||||
prefixes_aggregation_key_v6 = f'{today}|{asn}|v6'
|
prefixes_aggregation_key_v6 = f'{day}|{asn}|v6'
|
||||||
to_delete.update([prefixes_aggregation_key_v4, prefixes_aggregation_key_v6])
|
to_delete.update([prefixes_aggregation_key_v4, prefixes_aggregation_key_v6])
|
||||||
if asn == '0':
|
if asn == '0':
|
||||||
# Default ASN when no matches. Probably spoofed.
|
# Default ASN when no matches. Probably spoofed.
|
||||||
continue
|
continue
|
||||||
self.logger.debug(f'{today} - Ranking source: {source} / ASN: {asn}')
|
self.logger.debug(f'{day} - Ranking source: {source} / ASN: {asn}')
|
||||||
asn_rank_v4 = 0.0
|
asn_rank_v4 = 0.0
|
||||||
asn_rank_v6 = 0.0
|
asn_rank_v6 = 0.0
|
||||||
for prefix in self.storage.smembers(f'{today}|{source}|{asn}'):
|
for prefix in self.storage.smembers(f'{day}|{source}|{asn}'):
|
||||||
ips = set([ip_ts.split('|')[0]
|
ips = set([ip_ts.split('|')[0]
|
||||||
for ip_ts in self.storage.smembers(f'{today}|{source}|{asn}|{prefix}')])
|
for ip_ts in self.storage.smembers(f'{day}|{source}|{asn}|{prefix}')])
|
||||||
py_prefix = ip_network(prefix)
|
py_prefix = ip_network(prefix)
|
||||||
prefix_rank = float(len(ips)) / py_prefix.num_addresses
|
prefix_rank = float(len(ips)) / py_prefix.num_addresses
|
||||||
r_pipeline.zadd(f'{today}|{source}|{asn}|v{py_prefix.version}|prefixes', prefix_rank, prefix)
|
r_pipeline.zadd(f'{day}|{source}|{asn}|v{py_prefix.version}|prefixes', prefix_rank, prefix)
|
||||||
if py_prefix.version == 4:
|
if py_prefix.version == 4:
|
||||||
asn_rank_v4 += len(ips) * self.config_files[source]['impact']
|
asn_rank_v4 += len(ips) * self.config_files[source]['impact']
|
||||||
r_pipeline.zincrby(prefixes_aggregation_key_v4, prefix, prefix_rank * self.config_files[source]['impact'])
|
r_pipeline.zincrby(prefixes_aggregation_key_v4, prefix, prefix_rank * self.config_files[source]['impact'])
|
||||||
|
@ -66,17 +60,32 @@ class Ranking():
|
||||||
if v4count:
|
if v4count:
|
||||||
asn_rank_v4 /= float(v4count)
|
asn_rank_v4 /= float(v4count)
|
||||||
if asn_rank_v4:
|
if asn_rank_v4:
|
||||||
r_pipeline.set(f'{today}|{source}|{asn}|v4', asn_rank_v4)
|
r_pipeline.set(f'{day}|{source}|{asn}|v4', asn_rank_v4)
|
||||||
r_pipeline.zincrby(asns_aggregation_key_v4, asn, asn_rank_v4)
|
r_pipeline.zincrby(asns_aggregation_key_v4, asn, asn_rank_v4)
|
||||||
r_pipeline.zadd(source_aggregation_key_v4, asn_rank_v4, asn)
|
r_pipeline.zadd(source_aggregation_key_v4, asn_rank_v4, asn)
|
||||||
if v6count:
|
if v6count:
|
||||||
asn_rank_v6 /= float(v6count)
|
asn_rank_v6 /= float(v6count)
|
||||||
if asn_rank_v6:
|
if asn_rank_v6:
|
||||||
r_pipeline.set(f'{today}|{source}|{asn}|v6', asn_rank_v6)
|
r_pipeline.set(f'{day}|{source}|{asn}|v6', asn_rank_v6)
|
||||||
r_pipeline.zincrby(asns_aggregation_key_v6, asn, asn_rank_v6)
|
r_pipeline.zincrby(asns_aggregation_key_v6, asn, asn_rank_v6)
|
||||||
r_pipeline.zadd(source_aggregation_key_v6, asn_rank_v4, asn)
|
r_pipeline.zadd(source_aggregation_key_v6, asn_rank_v4, asn)
|
||||||
self.ranking.delete(*to_delete)
|
self.ranking.delete(*to_delete)
|
||||||
r_pipeline.execute()
|
r_pipeline.execute()
|
||||||
|
|
||||||
|
def compute(self):
|
||||||
|
self.logger.info('Start ranking')
|
||||||
|
set_running(self.__class__.__name__)
|
||||||
|
v4_last, v6_last = self.asn_meta.mget('v4|last', 'v6|last')
|
||||||
|
if not v4_last or not v6_last:
|
||||||
|
'''Failsafe if asn_meta has not been populated yet'''
|
||||||
|
unset_running(self.__class__.__name__)
|
||||||
|
return
|
||||||
|
today = date.today()
|
||||||
|
now = datetime.now()
|
||||||
|
today12am = now.replace(hour=12, minute=0, second=0, microsecond=0)
|
||||||
|
if now < today12am:
|
||||||
|
# Compute yesterday and today's ranking (useful when we have lists generated only once a day)
|
||||||
|
self.rank_a_day((today - timedelta(days=1)).isoformat())
|
||||||
|
self.rank_a_day(today.isoformat())
|
||||||
unset_running(self.__class__.__name__)
|
unset_running(self.__class__.__name__)
|
||||||
self.logger.info('Ranking done.')
|
self.logger.info('Ranking done.')
|
||||||
|
|
|
@ -0,0 +1,159 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
import logging
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from dateutil.parser import parse
|
||||||
|
from typing import Tuple
|
||||||
|
from datetime import datetime, date
|
||||||
|
from pathlib import Path
|
||||||
|
from .libs.helpers import safe_create_dir, set_running, unset_running
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
class ShadowServerFetcher():
|
||||||
|
|
||||||
|
def __init__(self, user, password, config_path_modules: Path, storage_directory: Path,
|
||||||
|
loglevel: int=logging.DEBUG) -> None:
|
||||||
|
self.__init_logger(loglevel)
|
||||||
|
self.storage_directory = storage_directory
|
||||||
|
self.config_path_modules = config_path_modules
|
||||||
|
self.user = user
|
||||||
|
self.password = password
|
||||||
|
self.index_page = 'https://dl.shadowserver.org/reports/index.php'
|
||||||
|
self.vendor = 'shadowserver'
|
||||||
|
self.known_list_types = ('blacklist', 'botnet', 'cc', 'cisco', 'cwsandbox', 'drone',
|
||||||
|
'microsoft', 'scan', 'sinkhole6', 'sinkhole')
|
||||||
|
self.first_available_day = None
|
||||||
|
self.last_available_day = None
|
||||||
|
self.available_entries = {}
|
||||||
|
|
||||||
|
def __init_logger(self, loglevel):
|
||||||
|
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
||||||
|
self.logger.setLevel(loglevel)
|
||||||
|
|
||||||
|
async def __get_index(self):
|
||||||
|
auth_details = {'user': self.user, 'password': self.password, 'login': 'Login'}
|
||||||
|
async with aiohttp.ClientSession() as s:
|
||||||
|
self.logger.debug('Fetching the index.')
|
||||||
|
async with s.post(self.index_page, data=auth_details) as r:
|
||||||
|
return await r.text()
|
||||||
|
|
||||||
|
async def __build_daily_dict(self):
|
||||||
|
html_index = await self.__get_index()
|
||||||
|
soup = BeautifulSoup(html_index, 'html.parser')
|
||||||
|
treeview = soup.find(id='treemenu1')
|
||||||
|
for y in treeview.select('> li'):
|
||||||
|
year = y.contents[0]
|
||||||
|
for m in y.contents[1].select('> li'):
|
||||||
|
month = m.contents[0]
|
||||||
|
for d in m.contents[1].select('> li'):
|
||||||
|
day = d.contents[0]
|
||||||
|
date = parse(f'{year} {month} {day}').date()
|
||||||
|
self.available_entries[date.isoformat()] = []
|
||||||
|
for a in d.contents[1].find_all('a', href=True):
|
||||||
|
if not self.first_available_day:
|
||||||
|
self.first_available_day = date
|
||||||
|
self.last_available_day = date
|
||||||
|
self.available_entries[date.isoformat()].append((a['href'], a.string))
|
||||||
|
self.logger.debug('Dictionary created.')
|
||||||
|
|
||||||
|
def __normalize_day(self, day: Tuple[str, date, datetime]=None) -> str:
|
||||||
|
if not day:
|
||||||
|
if not self.last_available_day:
|
||||||
|
raise Exception('Unable to figure out the last available day. You need to run build_daily_dict first')
|
||||||
|
day = self.last_available_day
|
||||||
|
else:
|
||||||
|
if isinstance(day, str):
|
||||||
|
day = parse(day).date()
|
||||||
|
elif isinstance(day, datetime):
|
||||||
|
day = day.date()
|
||||||
|
return day.isoformat()
|
||||||
|
|
||||||
|
def __split_name(self, name):
|
||||||
|
type_content, country, list_type = name.split('-')
|
||||||
|
if '_' in type_content:
|
||||||
|
type_content, details_type = type_content.split('_', maxsplit=1)
|
||||||
|
if '_' in details_type:
|
||||||
|
details_type, sub = details_type.split('_')
|
||||||
|
return list_type, country, (type_content, details_type, sub)
|
||||||
|
return list_type, country, (type_content, details_type)
|
||||||
|
return list_type, country, (type_content)
|
||||||
|
|
||||||
|
def __check_config(self, filename: str) -> Path:
|
||||||
|
self.logger.debug(f'Working on config for {filename}.')
|
||||||
|
config = {'vendor': 'shadowserver', 'parser': '.parsers.shadowserver'}
|
||||||
|
type_content, _, type_details = self.__split_name(filename)
|
||||||
|
prefix = type_content.split('.')[0]
|
||||||
|
config['name'] = '{}-{}'.format(prefix, '_'.join(type_details))
|
||||||
|
|
||||||
|
main_type = type_details[0]
|
||||||
|
if main_type not in self.known_list_types:
|
||||||
|
self.logger.warning(f'Unknown type: {main_type}. Please update the config creator script.')
|
||||||
|
return None
|
||||||
|
|
||||||
|
if main_type == 'blacklist':
|
||||||
|
config['impact'] = 5
|
||||||
|
elif main_type == 'botnet':
|
||||||
|
config['impact'] = 2
|
||||||
|
elif main_type == 'cc':
|
||||||
|
config['impact'] = 5
|
||||||
|
elif main_type == 'cisco':
|
||||||
|
config['impact'] = 3
|
||||||
|
elif main_type == 'cwsandbox':
|
||||||
|
config['impact'] = 5
|
||||||
|
elif main_type == 'drone':
|
||||||
|
config['impact'] = 2
|
||||||
|
elif main_type == 'microsoft':
|
||||||
|
config['impact'] = 3
|
||||||
|
elif main_type == 'scan':
|
||||||
|
config['impact'] = 1
|
||||||
|
elif main_type == 'sinkhole6':
|
||||||
|
config['impact'] = 2
|
||||||
|
elif main_type == 'sinkhole':
|
||||||
|
config['impact'] = 2
|
||||||
|
if not (self.config_path_modules / f"{config['vendor']}_{config['name']}.json").exists():
|
||||||
|
self.logger.debug(f'Creating config file for {filename}.')
|
||||||
|
with open(self.config_path_modules / f"{config['vendor']}_{config['name']}.json", 'w') as f:
|
||||||
|
json.dump(config, f, indent=2)
|
||||||
|
else:
|
||||||
|
with open(self.config_path_modules / f"{config['vendor']}_{config['name']}.json", 'r') as f:
|
||||||
|
# Validate new config file with old
|
||||||
|
config_current = json.load(f)
|
||||||
|
if config_current != config:
|
||||||
|
self.logger.warning('The config file created by this script is different from the one on disk: \n{}\n{}'.format(json.dumps(config), json.dumps(config_current)))
|
||||||
|
# Init list directory
|
||||||
|
directory = self.storage_directory / config['vendor'] / config['name']
|
||||||
|
safe_create_dir(directory)
|
||||||
|
meta = directory / 'meta'
|
||||||
|
safe_create_dir(meta)
|
||||||
|
archive_dir = directory / 'archive'
|
||||||
|
safe_create_dir(archive_dir)
|
||||||
|
self.logger.debug(f'Done with config for {filename}.')
|
||||||
|
return directory
|
||||||
|
|
||||||
|
async def download_daily_entries(self, day: Tuple[str, date, datetime]=None):
|
||||||
|
set_running(f'{self.__class__.__name__}')
|
||||||
|
await self.__build_daily_dict()
|
||||||
|
for url, filename in self.available_entries[self.__normalize_day(day)]:
|
||||||
|
storage_dir = self.__check_config(filename)
|
||||||
|
if not storage_dir:
|
||||||
|
continue
|
||||||
|
# Check if the file we're trying to download has already been downloaded. Skip if True.
|
||||||
|
uuid = url.split('/')[-1]
|
||||||
|
if (storage_dir / 'meta' / 'last_download').exists():
|
||||||
|
with open(storage_dir / 'meta' / 'last_download') as f:
|
||||||
|
last_download_uuid = f.read()
|
||||||
|
if last_download_uuid == uuid:
|
||||||
|
self.logger.debug(f'Already downloaded: {url}.')
|
||||||
|
continue
|
||||||
|
async with aiohttp.ClientSession() as s:
|
||||||
|
async with s.get(url) as r:
|
||||||
|
self.logger.info(f'Downloading {url}.')
|
||||||
|
content = await r.content.read()
|
||||||
|
with (storage_dir / '{}.txt'.format(datetime.now().isoformat())).open('wb') as f:
|
||||||
|
f.write(content)
|
||||||
|
with open(storage_dir / 'meta' / 'last_download', 'w') as f:
|
||||||
|
f.write(uuid)
|
||||||
|
unset_running(f'{self.__class__.__name__}')
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from bgpranking.archive import DeepArchive
|
from bgpranking.archive import DeepArchive
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
@ -21,12 +21,18 @@ class ModulesManager(AbstractManager):
|
||||||
if not config_dir:
|
if not config_dir:
|
||||||
config_dir = get_config_path()
|
config_dir = get_config_path()
|
||||||
if not storage_directory:
|
if not storage_directory:
|
||||||
storage_directory = get_homedir() / 'rawdata'
|
self.storage_directory = get_homedir() / 'rawdata'
|
||||||
modules_config = config_dir / 'modules'
|
self.modules_config = config_dir / 'modules'
|
||||||
modules_paths = [modulepath for modulepath in modules_config.glob('*.json')]
|
self.modules_paths = [modulepath for modulepath in self.modules_config.glob('*.json')]
|
||||||
self.modules = [Fetcher(path, storage_directory, loglevel) for path in modules_paths]
|
self.modules = [Fetcher(path, self.storage_directory, loglevel) for path in self.modules_paths]
|
||||||
|
|
||||||
def _to_run_forever(self):
|
def _to_run_forever(self):
|
||||||
|
# Check if there are new config files
|
||||||
|
new_modules_paths = [modulepath for modulepath in self.modules_config.glob('*.json') if modulepath not in self.modules_paths]
|
||||||
|
self.modules += [Fetcher(path, self.storage_directory, self.loglevel) for path in new_modules_paths]
|
||||||
|
self.modules_paths += new_modules_paths
|
||||||
|
|
||||||
|
if self.modules:
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
try:
|
try:
|
||||||
loop.run_until_complete(asyncio.gather(
|
loop.run_until_complete(asyncio.gather(
|
||||||
|
@ -35,6 +41,8 @@ class ModulesManager(AbstractManager):
|
||||||
)
|
)
|
||||||
except aiohttp.client_exceptions.ClientConnectorError as e:
|
except aiohttp.client_exceptions.ClientConnectorError as e:
|
||||||
self.logger.critical(f'Exception while fetching lists: {e}')
|
self.logger.critical(f'Exception while fetching lists: {e}')
|
||||||
|
else:
|
||||||
|
self.logger.info('No config files were found so there are no fetchers running yet. Will try again later.')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from bgpranking.monitor import Monitor
|
from bgpranking.monitor import Monitor
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
@ -19,13 +19,21 @@ class ParserManager(AbstractManager):
|
||||||
if not config_dir:
|
if not config_dir:
|
||||||
config_dir = get_config_path()
|
config_dir = get_config_path()
|
||||||
if not storage_directory:
|
if not storage_directory:
|
||||||
storage_directory = get_homedir() / 'rawdata'
|
self.storage_directory = get_homedir() / 'rawdata'
|
||||||
modules_config = config_dir / 'modules'
|
self.modules_config = config_dir / 'modules'
|
||||||
modules_paths = [modulepath for modulepath in modules_config.glob('*.json')]
|
self.modules_paths = [modulepath for modulepath in self.modules_config.glob('*.json')]
|
||||||
self.modules = [RawFilesParser(path, storage_directory, loglevel) for path in modules_paths]
|
self.modules = [RawFilesParser(path, self.storage_directory, loglevel) for path in self.modules_paths]
|
||||||
|
|
||||||
def _to_run_forever(self):
|
def _to_run_forever(self):
|
||||||
|
# Check if there are new config files
|
||||||
|
new_modules_paths = [modulepath for modulepath in self.modules_config.glob('*.json') if modulepath not in self.modules_paths]
|
||||||
|
self.modules += [RawFilesParser(path, self.storage_directory, self.loglevel) for path in new_modules_paths]
|
||||||
|
self.modules_paths += new_modules_paths
|
||||||
|
|
||||||
|
if self.modules:
|
||||||
[module.parse_raw_files() for module in self.modules]
|
[module.parse_raw_files() for module in self.modules]
|
||||||
|
else:
|
||||||
|
self.logger.warning('No config files were found so there are no parsers running yet. Will try again later.')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from bgpranking.libs.helpers import get_homedir, check_running
|
from bgpranking.libs.helpers import get_homedir, check_running
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from bgpranking.libs.helpers import is_running, get_socket_path
|
from bgpranking.libs.helpers import is_running, get_socket_path
|
||||||
|
|
|
@ -0,0 +1,47 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import json
|
||||||
|
import asyncio
|
||||||
|
from pathlib import Path
|
||||||
|
import aiohttp
|
||||||
|
|
||||||
|
from bgpranking.abstractmanager import AbstractManager
|
||||||
|
from bgpranking.shadowserverfetcher import ShadowServerFetcher
|
||||||
|
from bgpranking.libs.helpers import get_config_path, get_homedir
|
||||||
|
|
||||||
|
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
||||||
|
level=logging.INFO, datefmt='%I:%M:%S')
|
||||||
|
|
||||||
|
|
||||||
|
class ShadowServerManager(AbstractManager):
|
||||||
|
|
||||||
|
def __init__(self, config_dir: Path=None, storage_directory: Path=None, loglevel: int=logging.DEBUG):
|
||||||
|
super().__init__(loglevel)
|
||||||
|
self.config = True
|
||||||
|
if not config_dir:
|
||||||
|
config_dir = get_config_path()
|
||||||
|
if not (config_dir / 'shadowserver.json').exists():
|
||||||
|
self.config = False
|
||||||
|
self.logger.warning(f'No config file available, the shadow server module will not be launched.')
|
||||||
|
return
|
||||||
|
with open(config_dir / 'shadowserver.json') as f:
|
||||||
|
ss_config = json.load(f)
|
||||||
|
if not storage_directory:
|
||||||
|
storage_directory = get_homedir() / 'rawdata'
|
||||||
|
modules_config = config_dir / 'modules'
|
||||||
|
self.fetcher = ShadowServerFetcher(ss_config['user'], ss_config['password'], modules_config, storage_directory, loglevel)
|
||||||
|
|
||||||
|
def _to_run_forever(self):
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
try:
|
||||||
|
loop.run_until_complete(self.fetcher.download_daily_entries())
|
||||||
|
except aiohttp.client_exceptions.ClientConnectorError as e:
|
||||||
|
self.logger.critical(f'Exception while fetching Shadow Server lists: {e}')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
modules_manager = ShadowServerManager()
|
||||||
|
if modules_manager.config:
|
||||||
|
modules_manager.run(sleep_in_sec=3600)
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from subprocess import Popen
|
from subprocess import Popen
|
||||||
|
@ -13,6 +13,7 @@ if __name__ == '__main__':
|
||||||
Popen(['loadprefixes.py'])
|
Popen(['loadprefixes.py'])
|
||||||
Popen(['rislookup.py'])
|
Popen(['rislookup.py'])
|
||||||
Popen(['fetcher.py'])
|
Popen(['fetcher.py'])
|
||||||
|
Popen(['ssfetcher.py'])
|
||||||
Popen(['parser.py'])
|
Popen(['parser.py'])
|
||||||
Popen(['sanitizer.py'])
|
Popen(['sanitizer.py'])
|
||||||
Popen(['dbinsert.py'])
|
Popen(['dbinsert.py'])
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from subprocess import Popen
|
from subprocess import Popen
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -13,7 +13,7 @@ setup(
|
||||||
description='BGP Ranking, the new one..',
|
description='BGP Ranking, the new one..',
|
||||||
packages=['bgpranking'],
|
packages=['bgpranking'],
|
||||||
scripts=['bin/archiver.py', 'bin/dbinsert.py', 'bin/fetcher.py', 'bin/parser.py',
|
scripts=['bin/archiver.py', 'bin/dbinsert.py', 'bin/fetcher.py', 'bin/parser.py',
|
||||||
'bin/loadprefixes.py', 'bin/rislookup.py', 'bin/sanitizer.py', 'bin/run_backend.py',
|
'bin/loadprefixes.py', 'bin/rislookup.py', 'bin/sanitizer.py', 'bin/run_backend.py', 'bin/ssfetcher.py',
|
||||||
'bin/monitor.py', 'bin/ranking.py', 'bin/asn_descriptions.py', 'bin/start.py', 'bin/stop.py', 'bin/shutdown.py'],
|
'bin/monitor.py', 'bin/ranking.py', 'bin/asn_descriptions.py', 'bin/start.py', 'bin/stop.py', 'bin/shutdown.py'],
|
||||||
classifiers=[
|
classifiers=[
|
||||||
'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)',
|
'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)',
|
||||||
|
|
Loading…
Reference in New Issue