chg: Move scrippsco2 feed generator to a sub directory

pull/511/head
Raphaël Vinot 2019-12-10 16:39:24 +01:00
parent c03b26a18c
commit 056cab15a0
2 changed files with 66 additions and 32 deletions

View File

@ -4,18 +4,38 @@
from dateutil.parser import parse from dateutil.parser import parse
import csv import csv
from pathlib import Path from pathlib import Path
import json
from uuid import uuid4
import requests import requests
from pymisp import MISPEvent, MISPObject, MISPTag from pymisp import MISPEvent, MISPObject, MISPTag, MISPOrganisation
from pymisp.tools import feed_meta_generator
from keys import misp_url, misp_key, misp_verifycert
from pymisp import ExpandedPyMISP
class Scrippts: class Scrippts:
def __init__(self): def __init__(self, output_dir: str= 'output', org_name: str='CIRCL',
self.misp = ExpandedPyMISP(misp_url, misp_key, misp_verifycert) org_uuid: str='55f6ea5e-2c60-40e5-964f-47a8950d210f'):
self.misp_org = MISPOrganisation()
self.misp_org.name = org_name
self.misp_org.uuid = org_uuid
self.output_dir = Path(output_dir)
self.output_dir.mkdir(exist_ok=True)
self.data_dir = self.output_dir / 'data'
self.data_dir.mkdir(exist_ok=True)
self.scrippts_meta_file = self.output_dir / '.meta_scrippts'
self.scrippts_meta = {}
if self.scrippts_meta_file.exists():
# Format: <infofield>,<uuid>.json
with self.scrippts_meta_file.open() as f:
reader = csv.reader(f)
for row in reader:
self.scrippts_meta[row[0]] = row[1]
else:
self.scrippts_meta_file.touch()
def geolocation_alt(self) -> MISPObject: def geolocation_alt(self) -> MISPObject:
# Alert, NWT, Canada # Alert, NWT, Canada
@ -200,9 +220,7 @@ class Scrippts:
return tag return tag
def fetch(self, url): def fetch(self, url):
filepath = Path('scrippts') / Path(url).name filepath = self.data_dir / Path(url).name
if filepath.exists():
return filepath
r = requests.get(url) r = requests.get(url)
if r.status_code != 200 or r.text[0] != '"': if r.status_code != 200 or r.text[0] != '"':
print(url) print(url)
@ -211,42 +229,42 @@ class Scrippts:
f.write(r.text) f.write(r.text)
return filepath return filepath
def get_existing_event_to_update(self, infofield):
found = self.misp.search(eventinfo=infofield, pythonify=True)
if found:
event = found[0]
return event
return False
def import_all(self, stations_short_names, interval, data_type): def import_all(self, stations_short_names, interval, data_type):
object_creator = getattr(self, f'{interval}_flask_{data_type}') object_creator = getattr(self, f'{interval}_flask_{data_type}')
if data_type == 'co2': if data_type == 'co2':
base_url = 'http://scrippsco2.ucsd.edu/assets/data/atmospheric/stations/flask_co2/' base_url = 'https://scrippsco2.ucsd.edu/assets/data/atmospheric/stations/flask_co2/'
elif data_type in ['c13', 'o18']: elif data_type in ['c13', 'o18']:
base_url = 'http://scrippsco2.ucsd.edu/assets/data/atmospheric/stations/flask_isotopic/' base_url = 'https://scrippsco2.ucsd.edu/assets/data/atmospheric/stations/flask_isotopic/'
for station in stations_short_names: for station in stations_short_names:
url = f'{base_url}/{interval}/{interval}_flask_{data_type}_{station}.csv' url = f'{base_url}/{interval}/{interval}_flask_{data_type}_{station}.csv'
infofield = f'[{station.upper()}] {interval} average atmospheric {data_type} concentrations' infofield = f'[{station.upper()}] {interval} average atmospheric {data_type} concentrations'
filepath = self.fetch(url) filepath = self.fetch(url)
if not filepath: if not filepath:
continue continue
update = True if infofield in self.scrippts_meta:
event = self.get_existing_event_to_update(infofield)
if event:
location = event.get_objects_by_name('geolocation')[0]
if not event:
event = MISPEvent() event = MISPEvent()
event.load_file(str(self.output_dir / self.scrippts_meta[infofield]))
location = event.get_objects_by_name('geolocation')[0]
update = True
else:
event = MISPEvent()
event.uuid = str(uuid4())
event.info = infofield event.info = infofield
event.Orgc = self.misp_org
event.add_tag(getattr(self, f'tag_{station}')()) event.add_tag(getattr(self, f'tag_{station}')())
location = getattr(self, f'geolocation_{station}')() location = getattr(self, f'geolocation_{station}')()
event.add_object(location) event.add_object(location)
event.add_attribute('link', f'http://scrippsco2.ucsd.edu/data/atmospheric_co2/{station}') event.add_attribute('link', f'https://scrippsco2.ucsd.edu/data/atmospheric_co2/{station}')
update = False update = False
with self.scrippts_meta_file.open('a') as f:
writer = csv.writer(f)
writer.writerow([infofield, f'{event.uuid}.json'])
object_creator(event, location, filepath, update) object_creator(event, location, filepath, update)
if update: feed_output = event.to_feed(with_meta=False)
self.misp.update_event(event) with (self.output_dir / f'{event.uuid}.json').open('w') as f:
else: # json.dump(feed_output, f, indent=2, sort_keys=True) # For testing
self.misp.add_event(event) json.dump(feed_output, f)
def import_monthly_co2_all(self): def import_monthly_co2_all(self):
to_import = ['alt', 'ptb', 'stp', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd'] to_import = ['alt', 'ptb', 'stp', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd']
@ -458,10 +476,14 @@ class Scrippts:
if __name__ == '__main__': if __name__ == '__main__':
i = Scrippts() output_dir = 'scrippsc02_feed'
i = Scrippts(output_dir=output_dir)
i.import_daily_co2_all() i.import_daily_co2_all()
i.import_daily_c13_all() i.import_daily_c13_all()
i.import_daily_o18_all() i.import_daily_o18_all()
i.import_monthly_co2_all() i.import_monthly_co2_all()
i.import_monthly_c13_all() i.import_monthly_c13_all()
i.import_monthly_o18_all() i.import_monthly_o18_all()
feed_meta_generator(Path(output_dir))

View File

@ -474,6 +474,8 @@ class MISPEvent(AbstractMISP):
def _set_default(self): def _set_default(self):
"""There are a few keys that could be set by default""" """There are a few keys that could be set by default"""
if not hasattr(self, 'published'):
self.published = True
if not hasattr(self, 'uuid'): if not hasattr(self, 'uuid'):
self.uuid = str(uuid.uuid4()) self.uuid = str(uuid.uuid4())
if not hasattr(self, 'date'): if not hasattr(self, 'date'):
@ -623,14 +625,14 @@ class MISPEvent(AbstractMISP):
else: else:
raise PyMISPError('All the attributes have to be of type MISPObject.') raise PyMISPError('All the attributes have to be of type MISPObject.')
def load_file(self, event_path): def load_file(self, event_path, validate=False, metadata_only=False):
"""Load a JSON dump from a file on the disk""" """Load a JSON dump from a file on the disk"""
if not os.path.exists(event_path): if not os.path.exists(event_path):
raise PyMISPError('Invalid path, unable to load the event.') raise PyMISPError('Invalid path, unable to load the event.')
with open(event_path, 'rb') as f: with open(event_path, 'rb') as f:
self.load(f) self.load(f, validate, metadata_only)
def load(self, json_event, validate=False): def load(self, json_event, validate=False, metadata_only=False):
"""Load a JSON dump from a pseudo file or a JSON string""" """Load a JSON dump from a pseudo file or a JSON string"""
if hasattr(json_event, 'read'): if hasattr(json_event, 'read'):
# python2 and python3 compatible to find if we have a file # python2 and python3 compatible to find if we have a file
@ -645,6 +647,9 @@ class MISPEvent(AbstractMISP):
event = json_event event = json_event
if not event: if not event:
raise PyMISPError('Invalid event') raise PyMISPError('Invalid event')
if metadata_only:
event.pop('Attribute', None)
event.pop('Object', None)
self.from_dict(**event) self.from_dict(**event)
if validate: if validate:
jsonschema.validate(json.loads(self.to_json()), self.__json_schema) jsonschema.validate(json.loads(self.to_json()), self.__json_schema)
@ -718,6 +723,11 @@ class MISPEvent(AbstractMISP):
self.publish_timestamp = datetime.datetime.fromtimestamp(int(kwargs.pop('publish_timestamp')), datetime.timezone.utc) self.publish_timestamp = datetime.datetime.fromtimestamp(int(kwargs.pop('publish_timestamp')), datetime.timezone.utc)
else: else:
self.publish_timestamp = datetime.datetime.fromtimestamp(int(kwargs.pop('publish_timestamp')), UTC()) self.publish_timestamp = datetime.datetime.fromtimestamp(int(kwargs.pop('publish_timestamp')), UTC())
if kwargs.get('sighting_timestamp'):
if sys.version_info >= (3, 3):
self.sighting_timestamp = datetime.datetime.fromtimestamp(int(kwargs.pop('sighting_timestamp')), datetime.timezone.utc)
else:
self.sighting_timestamp = datetime.datetime.fromtimestamp(int(kwargs.pop('sighting_timestamp')), UTC())
if kwargs.get('sharing_group_id'): if kwargs.get('sharing_group_id'):
self.sharing_group_id = int(kwargs.pop('sharing_group_id')) self.sharing_group_id = int(kwargs.pop('sharing_group_id'))
if kwargs.get('RelatedEvent'): if kwargs.get('RelatedEvent'):
@ -747,6 +757,8 @@ class MISPEvent(AbstractMISP):
to_return['date'] = self.date.isoformat() to_return['date'] = self.date.isoformat()
if to_return.get('publish_timestamp'): if to_return.get('publish_timestamp'):
to_return['publish_timestamp'] = self._datetime_to_timestamp(self.publish_timestamp) to_return['publish_timestamp'] = self._datetime_to_timestamp(self.publish_timestamp)
if to_return.get('sighting_timestamp'):
to_return['sighting_timestamp'] = self._datetime_to_timestamp(self.sighting_timestamp)
return to_return return to_return