2018-07-13 14:51:00 +02:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import logging
2021-12-06 14:30:08 +01:00
from logging import Logger
import json
2018-07-13 14:51:00 +02:00
import asyncio
2021-12-06 14:30:08 +01:00
from typing import Tuple , Dict , List , Optional , TypeVar , Any
from datetime import datetime , date
2018-07-13 14:51:00 +02:00
from pathlib import Path
2021-12-06 14:30:08 +01:00
2018-07-13 14:51:00 +02:00
import aiohttp
2021-12-06 14:30:08 +01:00
from bs4 import BeautifulSoup # type: ignore
from dateutil . parser import parse
from bgpranking . default import AbstractManager , get_homedir , safe_create_dir
from bgpranking . helpers import get_data_dir , get_modules_dir
2018-07-13 14:51:00 +02:00
logging . basicConfig ( format = ' %(asctime)s %(name)s %(levelname)s : %(message)s ' ,
2021-12-17 10:54:05 +01:00
level = logging . INFO )
2018-07-13 14:51:00 +02:00
2021-12-06 14:30:08 +01:00
Dates = TypeVar ( ' Dates ' , datetime , date , str )
class ShadowServerFetcher ( ) :
def __init__ ( self , user , password , logger : Logger ) - > None :
self . logger = logger
self . storage_directory = get_data_dir ( )
self . config_path_modules = get_modules_dir ( )
self . user = user
self . password = password
self . index_page = ' https://dl.shadowserver.org/reports/index.php '
self . vendor = ' shadowserver '
2022-01-03 13:38:12 +01:00
self . known_list_types = ( ' blacklist ' , ' blocklist ' , ' botnet ' , ' cc ' , ' cisco ' , ' cwsandbox ' ,
2022-01-04 14:15:59 +01:00
' device ' , ' drone ' , ' event4 ' , ' malware ' , ' scan6 ' , ' event6 ' , ' netis ' ,
2021-12-06 14:30:08 +01:00
' microsoft ' , ' scan ' , ' sinkhole6 ' , ' sinkhole ' , ' outdated ' ,
' compromised ' , ' hp ' , ' darknet ' , ' ddos ' )
2022-01-03 13:38:12 +01:00
self . first_available_day : Optional [ date ] = None
2021-12-06 14:30:08 +01:00
self . last_available_day : date
self . available_entries : Dict [ str , List [ Tuple [ str , str ] ] ] = { }
async def __get_index ( self ) :
auth_details = { ' user ' : self . user , ' password ' : self . password , ' login ' : ' Login ' }
async with aiohttp . ClientSession ( ) as s :
self . logger . debug ( ' Fetching the index. ' )
async with s . post ( self . index_page , data = auth_details ) as r :
return await r . text ( )
async def __build_daily_dict ( self ) :
html_index = await self . __get_index ( )
soup = BeautifulSoup ( html_index , ' html.parser ' )
treeview = soup . find ( id = ' treemenu1 ' )
for y in treeview . select ( ' :scope > li ' ) :
year = y . contents [ 0 ]
for m in y . contents [ 1 ] . select ( ' :scope > li ' ) :
month = m . contents [ 0 ]
for d in m . contents [ 1 ] . select ( ' :scope > li ' ) :
day = d . contents [ 0 ]
date = parse ( f ' { year } { month } { day } ' ) . date ( )
self . available_entries [ date . isoformat ( ) ] = [ ]
for a in d . contents [ 1 ] . find_all ( ' a ' , href = True ) :
if not self . first_available_day :
self . first_available_day = date
self . last_available_day = date
self . available_entries [ date . isoformat ( ) ] . append ( ( a [ ' href ' ] , a . string ) )
self . logger . debug ( ' Dictionary created. ' )
def __normalize_day ( self , day : Optional [ Dates ] = None ) - > str :
if not day :
if not self . last_available_day :
raise Exception ( ' Unable to figure out the last available day. You need to run build_daily_dict first ' )
to_return = self . last_available_day
else :
if isinstance ( day , str ) :
to_return = parse ( day ) . date ( )
elif isinstance ( day , datetime ) :
to_return = day . date ( )
return to_return . isoformat ( )
def __split_name ( self , name ) :
type_content , country , list_type = name . split ( ' - ' )
if ' _ ' in type_content :
type_content , details_type = type_content . split ( ' _ ' , maxsplit = 1 )
if ' _ ' in details_type :
details_type , sub = details_type . split ( ' _ ' , maxsplit = 1 )
return list_type , country , ( type_content , details_type , sub )
return list_type , country , ( type_content , details_type )
return list_type , country , ( type_content )
def __check_config ( self , filename : str ) - > Optional [ Path ] :
self . logger . debug ( f ' Working on config for { filename } . ' )
config : Dict [ str , Any ] = { ' vendor ' : ' shadowserver ' , ' parser ' : ' .parsers.shadowserver ' }
type_content , _ , type_details = self . __split_name ( filename )
prefix = type_content . split ( ' . ' ) [ 0 ]
if isinstance ( type_details , str ) :
main_type = type_details
config [ ' name ' ] = ' {} - {} ' . format ( prefix , type_details )
else :
main_type = type_details [ 0 ]
config [ ' name ' ] = ' {} - {} ' . format ( prefix , ' _ ' . join ( type_details ) )
if main_type not in self . known_list_types :
self . logger . warning ( f ' Unknown type: { main_type } . Please update the config creator script. ' )
return None
if main_type == ' blacklist ' :
config [ ' impact ' ] = 5
2022-01-03 13:38:12 +01:00
elif main_type == ' blocklist ' :
config [ ' impact ' ] = 5
2021-12-06 14:30:08 +01:00
elif main_type == ' botnet ' :
config [ ' impact ' ] = 2
2022-01-03 13:38:12 +01:00
elif main_type == ' malware ' :
config [ ' impact ' ] = 2
2021-12-06 14:30:08 +01:00
elif main_type == ' cc ' :
config [ ' impact ' ] = 5
elif main_type == ' cisco ' :
config [ ' impact ' ] = 3
elif main_type == ' cwsandbox ' :
config [ ' impact ' ] = 5
elif main_type == ' drone ' :
config [ ' impact ' ] = 2
elif main_type == ' microsoft ' :
config [ ' impact ' ] = 3
elif main_type == ' scan ' :
config [ ' impact ' ] = 1
2022-01-03 13:38:12 +01:00
elif main_type == ' scan6 ' :
config [ ' impact ' ] = 1
2021-12-06 14:30:08 +01:00
elif main_type == ' sinkhole6 ' :
config [ ' impact ' ] = 2
elif main_type == ' sinkhole ' :
config [ ' impact ' ] = 2
2022-01-03 13:38:12 +01:00
elif main_type == ' device ' :
config [ ' impact ' ] = 1
elif main_type == ' event4 ' :
config [ ' impact ' ] = 2
2022-01-04 14:15:59 +01:00
elif main_type == ' event6 ' :
config [ ' impact ' ] = 2
elif main_type == ' netis ' :
config [ ' impact ' ] = 2
2021-12-06 14:30:08 +01:00
else :
config [ ' impact ' ] = 1
if not ( self . config_path_modules / f " { config [ ' vendor ' ] } _ { config [ ' name ' ] } .json " ) . exists ( ) :
self . logger . debug ( f ' Creating config file for { filename } . ' )
with open ( self . config_path_modules / f " { config [ ' vendor ' ] } _ { config [ ' name ' ] } .json " , ' w ' ) as f :
json . dump ( config , f , indent = 2 )
else :
with open ( self . config_path_modules / f " { config [ ' vendor ' ] } _ { config [ ' name ' ] } .json " , ' r ' ) as f :
# Validate new config file with old
config_current = json . load ( f )
if config_current != config :
self . logger . warning ( ' The config file created by this script is different from the one on disk: \n {} \n {} ' . format ( json . dumps ( config ) , json . dumps ( config_current ) ) )
# Init list directory
directory = self . storage_directory / config [ ' vendor ' ] / config [ ' name ' ]
safe_create_dir ( directory )
meta = directory / ' meta '
safe_create_dir ( meta )
archive_dir = directory / ' archive '
safe_create_dir ( archive_dir )
self . logger . debug ( f ' Done with config for { filename } . ' )
return directory
async def download_daily_entries ( self , day : Optional [ Dates ] = None ) :
await self . __build_daily_dict ( )
for url , filename in self . available_entries [ self . __normalize_day ( day ) ] :
storage_dir = self . __check_config ( filename )
if not storage_dir :
continue
# Check if the file we're trying to download has already been downloaded. Skip if True.
uuid = url . split ( ' / ' ) [ - 1 ]
if ( storage_dir / ' meta ' / ' last_download ' ) . exists ( ) :
with open ( storage_dir / ' meta ' / ' last_download ' ) as _fr :
last_download_uuid = _fr . read ( )
if last_download_uuid == uuid :
self . logger . debug ( f ' Already downloaded: { url } . ' )
continue
async with aiohttp . ClientSession ( ) as s :
async with s . get ( url ) as r :
self . logger . info ( f ' Downloading { url } . ' )
content = await r . content . read ( )
with ( storage_dir / f ' { datetime . now ( ) . isoformat ( ) } .txt ' ) . open ( ' wb ' ) as _fw :
_fw . write ( content )
with ( storage_dir / ' meta ' / ' last_download ' ) . open ( ' w ' ) as _fwt :
_fwt . write ( uuid )
2018-07-13 14:51:00 +02:00
class ShadowServerManager ( AbstractManager ) :
2021-12-06 14:30:08 +01:00
def __init__ ( self , loglevel : int = logging . INFO ) :
2018-07-13 14:51:00 +02:00
super ( ) . __init__ ( loglevel )
2021-12-06 14:30:08 +01:00
self . script_name = ' shadowserver_fetcher '
shadow_server_config_file = get_homedir ( ) / ' config ' / ' shadowserver.json '
2018-07-13 14:51:00 +02:00
self . config = True
2021-12-06 14:30:08 +01:00
if not shadow_server_config_file . exists ( ) :
2018-07-13 14:51:00 +02:00
self . config = False
2021-12-06 14:30:08 +01:00
self . logger . warning ( f ' No config file available { shadow_server_config_file } , the shadow server module will not be launched. ' )
2018-07-13 14:51:00 +02:00
return
2021-12-06 14:30:08 +01:00
with shadow_server_config_file . open ( ) as f :
2018-07-13 14:51:00 +02:00
ss_config = json . load ( f )
2021-12-06 14:30:08 +01:00
self . fetcher = ShadowServerFetcher ( ss_config [ ' user ' ] , ss_config [ ' password ' ] , self . logger )
2018-07-13 14:51:00 +02:00
2021-12-06 14:30:08 +01:00
async def _to_run_forever_async ( self ) :
await self . fetcher . download_daily_entries ( )
2018-07-13 14:51:00 +02:00
2021-12-06 14:30:08 +01:00
def main ( ) :
2018-07-13 14:51:00 +02:00
modules_manager = ShadowServerManager ( )
if modules_manager . config :
2021-12-06 14:30:08 +01:00
asyncio . run ( modules_manager . run_async ( sleep_in_sec = 3600 ) )
if __name__ == ' __main__ ' :
main ( )