2020-02-29 01:33:03 +01:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
from csv import DictReader
|
2020-03-24 13:25:41 +01:00
|
|
|
from pymisp import MISPEvent, MISPOrganisation, PyMISP, MISPObject
|
2020-02-29 01:33:03 +01:00
|
|
|
from datetime import datetime
|
|
|
|
from dateutil.parser import parse
|
|
|
|
import json
|
|
|
|
from pymisp.tools import feed_meta_generator
|
2020-02-29 02:10:16 +01:00
|
|
|
from io import BytesIO
|
2020-03-24 13:25:41 +01:00
|
|
|
from collections import defaultdict
|
2020-02-29 01:33:03 +01:00
|
|
|
|
|
|
|
make_feed = False
|
|
|
|
|
2020-03-24 13:25:41 +01:00
|
|
|
aggregate_by_country = True
|
|
|
|
|
2020-02-29 01:33:03 +01:00
|
|
|
path = Path('/home/raphael/gits/COVID-19/csse_covid_19_data/csse_covid_19_daily_reports/')
|
|
|
|
|
|
|
|
|
2020-03-24 13:25:41 +01:00
|
|
|
def get_country_region(row):
|
|
|
|
if 'Country/Region' in row:
|
|
|
|
return row['Country/Region']
|
|
|
|
elif 'Country_Region' in row:
|
|
|
|
return row['Country_Region']
|
|
|
|
else:
|
|
|
|
print(p, row.keys())
|
|
|
|
raise Exception()
|
|
|
|
|
|
|
|
|
|
|
|
def get_last_update(row):
|
|
|
|
if 'Last_Update' in row:
|
|
|
|
return parse(row['Last_Update'])
|
|
|
|
elif 'Last Update' in row:
|
|
|
|
return parse(row['Last Update'])
|
|
|
|
else:
|
|
|
|
print(p, row.keys())
|
|
|
|
raise Exception()
|
|
|
|
|
|
|
|
|
|
|
|
def add_detailed_object(obj, row):
|
|
|
|
if 'Province/State' in row:
|
|
|
|
if row['Province/State']:
|
|
|
|
obj.add_attribute('province-state', row['Province/State'])
|
|
|
|
elif '\ufeffProvince/State' in row:
|
|
|
|
if row['\ufeffProvince/State']:
|
|
|
|
obj.add_attribute('province-state', row['\ufeffProvince/State'])
|
|
|
|
elif 'Province_State' in row:
|
|
|
|
if row['Province_State']:
|
|
|
|
obj.add_attribute('province-state', row['Province_State'])
|
|
|
|
else:
|
|
|
|
print(p, row.keys())
|
|
|
|
raise Exception()
|
|
|
|
|
|
|
|
obj.add_attribute('country-region', get_country_region(row))
|
|
|
|
|
|
|
|
obj.add_attribute('update', get_last_update(row))
|
|
|
|
|
|
|
|
if 'Lat' in row:
|
|
|
|
obj.add_attribute('latitude', row['Lat'])
|
|
|
|
|
|
|
|
if 'Long_' in row:
|
|
|
|
obj.add_attribute('longitude', row['Long_'])
|
|
|
|
elif 'Long' in row:
|
|
|
|
obj.add_attribute('longitude', row['Long'])
|
|
|
|
|
|
|
|
if row['Confirmed']:
|
|
|
|
obj.add_attribute('confirmed', int(row['Confirmed']))
|
|
|
|
if row['Deaths']:
|
|
|
|
obj.add_attribute('death', int(row['Deaths']))
|
|
|
|
if row['Recovered']:
|
|
|
|
obj.add_attribute('recovered', int(row['Recovered']))
|
|
|
|
if 'Active' in row and row['Active']:
|
|
|
|
obj.add_attribute('active', int(row['Active']))
|
|
|
|
|
|
|
|
|
|
|
|
def country_aggregate(aggregate, row):
|
|
|
|
c = get_country_region(row)
|
|
|
|
if c not in aggregate:
|
|
|
|
aggregate[c] = defaultdict(active=0, death=0, recovered=0, confirmed=0, update=datetime.fromtimestamp(0))
|
|
|
|
if row['Confirmed']:
|
|
|
|
aggregate[c]['confirmed'] += int(row['Confirmed'])
|
|
|
|
if row['Deaths']:
|
|
|
|
aggregate[c]['death'] += int(row['Deaths'])
|
|
|
|
if row['Recovered']:
|
|
|
|
aggregate[c]['recovered'] += int(row['Recovered'])
|
|
|
|
if 'Active' in row and row['Active']:
|
|
|
|
aggregate[c]['active'] += int(row['Active'])
|
|
|
|
|
|
|
|
update = get_last_update(row)
|
|
|
|
if update > aggregate[c]['update']:
|
|
|
|
aggregate[c]['update'] = update
|
|
|
|
|
|
|
|
|
2020-02-29 01:33:03 +01:00
|
|
|
if make_feed:
|
|
|
|
org = MISPOrganisation()
|
|
|
|
org.name = 'CIRCL'
|
|
|
|
org.uuid = "55f6ea5e-2c60-40e5-964f-47a8950d210f"
|
|
|
|
else:
|
|
|
|
from covid_key import url, key
|
|
|
|
misp = PyMISP(url, key)
|
|
|
|
|
|
|
|
for p in path.glob('**/*.csv'):
|
|
|
|
d = datetime.strptime(p.name[:-4], '%m-%d-%Y').date()
|
|
|
|
event = MISPEvent()
|
2020-03-24 13:25:41 +01:00
|
|
|
if aggregate_by_country:
|
|
|
|
event.info = f"[{d.isoformat()}] CSSE COVID-19 daily report"
|
|
|
|
else:
|
|
|
|
event.info = f"[{d.isoformat()}] CSSE COVID-19 detailed daily report"
|
2020-02-29 01:33:03 +01:00
|
|
|
event.date = d
|
2020-02-29 02:10:16 +01:00
|
|
|
event.distribution = 3
|
|
|
|
event.add_tag('tlp:white')
|
2020-02-29 01:33:03 +01:00
|
|
|
if make_feed:
|
|
|
|
event.orgc = org
|
|
|
|
else:
|
|
|
|
e = misp.search(eventinfo=event.info, metadata=True, pythonify=True)
|
|
|
|
if e:
|
|
|
|
# Already added.
|
|
|
|
continue
|
2020-02-29 02:10:16 +01:00
|
|
|
event.add_attribute('attachment', p.name, data=BytesIO(p.open('rb').read()))
|
|
|
|
event.add_attribute('link', f'https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_daily_reports/{p.name}', comment='Source')
|
2020-03-24 13:25:41 +01:00
|
|
|
if aggregate_by_country:
|
|
|
|
aggregate = defaultdict()
|
2020-02-29 01:33:03 +01:00
|
|
|
with p.open() as f:
|
|
|
|
reader = DictReader(f)
|
|
|
|
for row in reader:
|
2020-03-24 13:25:41 +01:00
|
|
|
if aggregate_by_country:
|
|
|
|
country_aggregate(aggregate, row)
|
2020-02-29 01:33:03 +01:00
|
|
|
else:
|
2020-03-24 13:25:41 +01:00
|
|
|
obj = MISPObject(name='covid19-csse-daily-report')
|
|
|
|
add_detailed_object(obj, row)
|
|
|
|
event.add_object(obj)
|
|
|
|
|
|
|
|
if aggregate_by_country:
|
|
|
|
for country, values in aggregate.items():
|
|
|
|
obj = event.add_object(name='covid19-csse-daily-report', standalone=False)
|
|
|
|
obj.add_attribute('country-region', country)
|
|
|
|
obj.add_attribute('update', values['update'])
|
|
|
|
obj.add_attribute('confirmed', values['confirmed'])
|
|
|
|
obj.add_attribute('death', values['death'])
|
|
|
|
obj.add_attribute('recovered', values['recovered'])
|
|
|
|
obj.add_attribute('active', values['active'])
|
|
|
|
|
2020-02-29 01:33:03 +01:00
|
|
|
if make_feed:
|
|
|
|
with (Path('output') / f'{event.uuid}.json').open('w') as _w:
|
|
|
|
json.dump(event.to_feed(), _w)
|
|
|
|
else:
|
2020-03-20 09:53:35 +01:00
|
|
|
event = misp.add_event(event)
|
|
|
|
misp.publish(event)
|
2020-02-29 01:33:03 +01:00
|
|
|
|
|
|
|
if make_feed:
|
|
|
|
feed_meta_generator(Path('output'))
|