fix: Properly use asyncio all over the place \o/

pull/12/head
Raphaël Vinot 2018-03-20 21:44:46 +01:00
parent 7961b68852
commit b3c68db8ec
9 changed files with 218 additions and 57 deletions

View File

@ -71,6 +71,24 @@ Creates the following hashes:
IP = {'asn': <asn>, 'prefix': <prefix>, 'description': <description>} IP = {'asn': <asn>, 'prefix': <prefix>, 'description': <description>}
``` ```
## Ranking Information cache (redis, port 6382)
*Usage*: Store the current list of known ASNs at RIPE, and the prefixes originating from them.
Creates the following sets:
```python
asns = set([<asn>, ...])
<asn>|v4 = set([<ipv4_prefix>, ...])
<asn>|v6 = set([<ipv6_prefix>, ...])
```
And the following keys:
```python
<asn>|v4|ipcount = <Total amount of IP v4 addresses originating this AS>
<asn>|v6|ipcount = <Total amount of IP v6 addresses originating this AS>
```
## Long term storage (ardb, port 16379) ## Long term storage (ardb, port 16379)

76
listimport/initranking.py Normal file
View File

@ -0,0 +1,76 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import logging
import json
from redis import Redis
import asyncio
from telnetlib import Telnet
from .libs.StatsRipeText import RIPECaching
from ipaddress import ip_network
class ASNLookup(RIPECaching):
def __init__(self, sourceapp: str='bgpranking-ng', loglevel: int=logging.DEBUG):
super().__init__(sourceapp, loglevel)
self.redis_cache = Redis(host='localhost', port=6382, db=0, decode_responses=True)
self.logger.debug('Starting ASN lookup cache')
def get_all_asns(self):
with Telnet(self.hostname, self.port) as tn:
tn.write(b'-k\n')
to_send = '-d ris-asns list_asns=true asn_types=o sourceapp={}\n'.format(self.sourceapp)
tn.write(to_send.encode())
ris_asns = json.loads(tn.read_until(b'\n}\n'))
all_asns = ris_asns['asns']['originating']
if not all_asns:
self.logger.warning('No ASNs in ris-asns, something went wrong.')
else:
self.redis_cache.sadd('asns', *all_asns)
self.redis_cache.sadd('asns_to_lookup', *all_asns)
tn.write(b'-k\n')
def fix_ipv4_networks(self, networks):
'''Because we can't have nice things.
Some netorks come without the last(s) bytes (i.e. 170.254.25/24)'''
to_return = []
for net in networks:
try:
to_return.append(ip_network(net))
except ValueError:
ip, mask = net.split('/')
iplist = ip.split('.')
iplist = iplist + ['0'] * (4 - len(iplist))
to_return.append(ip_network('{}/{}'.format('.'.join(iplist), mask)))
return to_return
async def get_originating_prefixes(self):
reader, writer = await asyncio.open_connection(self.hostname, self.port)
writer.write(b'-k\n')
while True:
asn = self.redis_cache.spop('asns_to_lookup')
if not asn:
break
self.logger.debug('ASN lookup: {}'.format(asn))
to_send = '-d ris-prefixes {} list_prefixes=true types=o af=v4,v6 noise=filter sourceapp={}\n'.format(asn, self.sourceapp)
writer.write(to_send.encode())
data = await reader.readuntil(b'\n}\n')
ris_prefixes = json.loads(data)
p = self.redis_cache.pipeline()
if ris_prefixes['prefixes']['v4']['originating']:
self.logger.debug('{} has ipv4'.format(asn))
fixed_networks = self.fix_ipv4_networks(ris_prefixes['prefixes']['v4']['originating'])
p.sadd('{}|v4'.format(asn), *[str(net) for net in fixed_networks])
total_ipv4 = sum([net.num_addresses for net in fixed_networks])
p.set('{}|v4|ipcount'.format(asn), total_ipv4)
if ris_prefixes['prefixes']['v6']['originating']:
self.logger.debug('{} has ipv6'.format(asn))
p.sadd('{}|v6'.format(asn), *ris_prefixes['prefixes']['v6']['originating'])
total_ipv6 = sum([ip_network(prefix).num_addresses for prefix in ris_prefixes['prefixes']['v6']['originating']])
p.set('{}|v4|ipcount'.format(asn), total_ipv6)
p.execute()
writer.write(b'-k\n')
writer.close()

View File

@ -0,0 +1,18 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import logging
from abc import ABC
class RIPECaching(ABC):
def __init__(self, sourceapp: str='bgpranking-ng', loglevel: int=logging.DEBUG):
self.sourceapp = sourceapp
self.hostname = 'stat.ripe.net'
self.port = 43
self.__init_logger(loglevel)
def __init_logger(self, loglevel):
self.logger = logging.getLogger('{}'.format(self.__class__.__name__))
self.logger.setLevel(loglevel)

View File

@ -1,7 +0,0 @@
{
"url": "http://www.dshield.org/feeds/daily_sources",
"vendor": "dshield",
"name": "daily",
"impact": 0.1,
"parser": "parsers.dshield"
}

View File

@ -1,13 +1,12 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import requests import aiohttp
from dateutil import parser from dateutil import parser
from datetime import datetime, date from datetime import datetime, date
from hashlib import sha512 # Faster than sha256 on 64b machines. from hashlib import sha512 # Faster than sha256 on 64b machines.
from pathlib import Path from pathlib import Path
import logging import logging
import asyncio
from pid import PidFile, PidFileError from pid import PidFile, PidFileError
import json import json
@ -46,13 +45,15 @@ class Fetcher():
self.vendor, self.listname)) self.vendor, self.listname))
self.logger.setLevel(loglevel) self.logger.setLevel(loglevel)
def __get_last_modified(self): async def __get_last_modified(self):
r = requests.head(self.url) async with aiohttp.ClientSession() as session:
if 'Last-Modified' in r.headers: async with session.head(self.url) as r:
return parser.parse(r.headers['Last-Modified']) headers = r.headers
if 'Last-Modified' in headers:
return parser.parse(headers['Last-Modified'])
return None return None
def __newer(self): async def __newer(self):
'''Check if the file available for download is newed than the one '''Check if the file available for download is newed than the one
already downloaded by checking the `Last-Modified` header. already downloaded by checking the `Last-Modified` header.
Note: return False if the file containing the last header content Note: return False if the file containing the last header content
@ -66,7 +67,7 @@ class Fetcher():
self.logger.debug('No Last-Modified header available') self.logger.debug('No Last-Modified header available')
return True return True
self.first_fetch = False self.first_fetch = False
last_modified = self.__get_last_modified() last_modified = await self.__get_last_modified()
if last_modified: if last_modified:
self.logger.debug('Last-Modified header available') self.logger.debug('Last-Modified header available')
with last_modified_path.open('w') as f: with last_modified_path.open('w') as f:
@ -75,8 +76,9 @@ class Fetcher():
self.logger.debug('No Last-Modified header available') self.logger.debug('No Last-Modified header available')
return True return True
with last_modified_path.open() as f: with last_modified_path.open() as f:
last_modified_file = parser.parse(f.read()) file_content = f.read()
last_modified = self.__get_last_modified() last_modified_file = parser.parse(file_content)
last_modified = await self.__get_last_modified()
if not last_modified: if not last_modified:
# No more Last-Modified header Oo # No more Last-Modified header Oo
self.logger.warning('{}: Last-Modified header was present, isn\'t anymore!'.format(self.listname)) self.logger.warning('{}: Last-Modified header was present, isn\'t anymore!'.format(self.listname))
@ -121,20 +123,22 @@ class Fetcher():
return True return True
return False return False
@asyncio.coroutine
async def fetch_list(self): async def fetch_list(self):
'''Fetch & store the list''' '''Fetch & store the list'''
if not self.fetcher: if not self.fetcher:
return return
try: try:
with PidFile('{}.pid'.format(self.listname), piddir=self.meta): with PidFile('{}.pid'.format(self.listname), piddir=self.meta):
if not self.__newer(): if not await self.__newer():
return return
r = requests.get(self.url)
if self.__same_as_last(r.content): async with aiohttp.ClientSession() as session:
async with session.get(self.url) as r:
content = await r.content.read()
if self.__same_as_last(content):
return return
self.logger.info('Got a new file \o/') self.logger.info('Got a new file \o/')
with (self.directory / '{}.txt'.format(datetime.now().isoformat())).open('wb') as f: with (self.directory / '{}.txt'.format(datetime.now().isoformat())).open('wb') as f:
f.write(r.content) f.write(content)
except PidFileError: except PidFileError:
self.logger.info('Fetcher already running') self.logger.info('Fetcher already running')

View File

@ -4,7 +4,6 @@
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
import logging import logging
import asyncio
import json import json
import re import re
from redis import Redis from redis import Redis
@ -51,7 +50,6 @@ class RawFilesParser():
self.datetime = datetime.now() self.datetime = datetime.now()
return self.extract_ipv4(f.getvalue()) return self.extract_ipv4(f.getvalue())
@asyncio.coroutine
async def parse_raw_files(self): async def parse_raw_files(self):
for filepath in self.files_to_parse: for filepath in self.files_to_parse:
self.logger.debug('Parsing {}, {} to go.'.format(filepath, len(self.files_to_parse) - 1)) self.logger.debug('Parsing {}, {} to go.'.format(filepath, len(self.files_to_parse) - 1))

View File

@ -2,39 +2,59 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import logging import logging
import json
from redis import Redis from redis import Redis
from .libs.StatsRipe import StatsRIPE from .libs.StatsRipeText import RIPECaching
import asyncio
class RoutingInformationServiceFetcher(): class RISPrefixLookup(RIPECaching):
def __init__(self, loglevel: int=logging.DEBUG): def __init__(self, sourceapp: str='bgpranking-ng', loglevel: int=logging.DEBUG):
self.__init_logger(loglevel) super().__init__(sourceapp, loglevel)
self.ris_cache = Redis(host='localhost', port=6381, db=0) self.logger.debug('Starting RIS Prefix fetcher')
self.logger.debug('Starting RIS fetcher')
self.ripe = StatsRIPE()
def __init_logger(self, loglevel): def cache_prefix(self, redis_cache, ip, network_info, prefix_overview):
self.logger = logging.getLogger('{}'.format(self.__class__.__name__)) prefix = network_info['prefix']
self.logger.setLevel(loglevel) asns = network_info['asns']
description = prefix_overview['block']['desc']
if not description:
description = prefix_overview['block']['name']
p = redis_cache.pipeline()
for asn in asns:
p.hmset(ip, {'asn': asn, 'prefix': prefix, 'description': description})
p.expire(ip, 43200) # 12H
p.execute()
async def fetch(self): async def run(self):
redis_cache = Redis(host='localhost', port=6381, db=0, decode_responses=True)
reader, writer = await asyncio.open_connection(self.hostname, self.port)
writer.write(b'-k\n')
while True: while True:
ip = self.ris_cache.spop('for_ris_lookup') ip = redis_cache.spop('for_ris_lookup')
if not ip: if not ip: # TODO: add a check against something to stop the loop
break self.logger.debug('Nothing to lookup')
ip = ip.decode() await asyncio.sleep(10)
network_info = await self.ripe.network_info(ip) continue
prefix = network_info['data']['prefix'] if redis_cache.exists(ip):
asns = network_info['data']['asns'] self.logger.debug('Already cached: {}'.format(ip))
if not asns or not prefix: continue
self.logger.debug('RIS lookup: {}'.format(ip))
to_send = '-d network-info {} sourceapp={}\n'.format(ip, self.sourceapp)
writer.write(to_send.encode())
data = await reader.readuntil(b'\n}\n')
network_info = json.loads(data)
if not network_info.get('prefix'):
self.logger.warning('The IP {} does not seem to be announced'.format(ip)) self.logger.warning('The IP {} does not seem to be announced'.format(ip))
continue continue
prefix_overview = await self.ripe.prefix_overview(prefix) self.logger.debug('Prefix lookup: {}'.format(ip))
description = prefix_overview['data']['block']['desc'] to_send = '-d prefix-overview {} sourceapp={}\n'.format(network_info['prefix'], self.sourceapp)
if not description: writer.write(to_send.encode())
description = prefix_overview['data']['block']['name'] data = await reader.readuntil(b'\n}\n')
for asn in asns: prefix_overview = json.loads(data)
self.ris_cache.hmset(ip, {'asn': asn, 'prefix': prefix, self.logger.debug('RIS cache prefix info: {}'.format(ip))
'description': description}) self.cache_prefix(redis_cache, ip, network_info, prefix_overview)
writer.write(b'-k\n')
writer.close()

30
ranking.py Executable file
View File

@ -0,0 +1,30 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import logging
import asyncio
from listimport.initranking import ASNLookup
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
level=logging.INFO, datefmt='%I:%M:%S')
class RankingManager():
def __init__(self, loglevel: int=logging.DEBUG):
self.asn_fetcher = ASNLookup(loglevel=loglevel)
async def run_fetcher(self):
# self.asn_fetcher.get_all_asns()
await asyncio.gather(
self.asn_fetcher.get_originating_prefixes(),
self.asn_fetcher.get_originating_prefixes(),
self.asn_fetcher.get_originating_prefixes()
)
if __name__ == '__main__':
modules_manager = RankingManager()
loop = asyncio.get_event_loop()
loop.run_until_complete(modules_manager.run_fetcher())

10
ris.py
View File

@ -3,7 +3,7 @@
import logging import logging
import asyncio import asyncio
from listimport.risfetcher import RoutingInformationServiceFetcher from listimport.risfetcher import RISPrefixLookup
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s', logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
level=logging.INFO, datefmt='%I:%M:%S') level=logging.INFO, datefmt='%I:%M:%S')
@ -12,10 +12,14 @@ logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
class RISManager(): class RISManager():
def __init__(self, loglevel: int=logging.DEBUG): def __init__(self, loglevel: int=logging.DEBUG):
self.ris_fetcher = RoutingInformationServiceFetcher(loglevel) self.ris_fetcher = RISPrefixLookup(loglevel=loglevel)
async def run_fetcher(self): async def run_fetcher(self):
await asyncio.gather(self.ris_fetcher.fetch()) await asyncio.gather(
self.ris_fetcher.run(),
self.ris_fetcher.run(),
# self.ris_fetcher.run(2)
)
if __name__ == '__main__': if __name__ == '__main__':