mirror of https://github.com/MISP/PyMISP
chg: Move scrippsco2 feed generator to a sub directory
parent
c03b26a18c
commit
056cab15a0
|
@ -4,18 +4,38 @@
|
||||||
from dateutil.parser import parse
|
from dateutil.parser import parse
|
||||||
import csv
|
import csv
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
import json
|
||||||
|
from uuid import uuid4
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from pymisp import MISPEvent, MISPObject, MISPTag
|
from pymisp import MISPEvent, MISPObject, MISPTag, MISPOrganisation
|
||||||
|
from pymisp.tools import feed_meta_generator
|
||||||
from keys import misp_url, misp_key, misp_verifycert
|
|
||||||
from pymisp import ExpandedPyMISP
|
|
||||||
|
|
||||||
|
|
||||||
class Scrippts:
|
class Scrippts:
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, output_dir: str= 'output', org_name: str='CIRCL',
|
||||||
self.misp = ExpandedPyMISP(misp_url, misp_key, misp_verifycert)
|
org_uuid: str='55f6ea5e-2c60-40e5-964f-47a8950d210f'):
|
||||||
|
self.misp_org = MISPOrganisation()
|
||||||
|
self.misp_org.name = org_name
|
||||||
|
self.misp_org.uuid = org_uuid
|
||||||
|
|
||||||
|
self.output_dir = Path(output_dir)
|
||||||
|
self.output_dir.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
self.data_dir = self.output_dir / 'data'
|
||||||
|
self.data_dir.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
self.scrippts_meta_file = self.output_dir / '.meta_scrippts'
|
||||||
|
self.scrippts_meta = {}
|
||||||
|
if self.scrippts_meta_file.exists():
|
||||||
|
# Format: <infofield>,<uuid>.json
|
||||||
|
with self.scrippts_meta_file.open() as f:
|
||||||
|
reader = csv.reader(f)
|
||||||
|
for row in reader:
|
||||||
|
self.scrippts_meta[row[0]] = row[1]
|
||||||
|
else:
|
||||||
|
self.scrippts_meta_file.touch()
|
||||||
|
|
||||||
def geolocation_alt(self) -> MISPObject:
|
def geolocation_alt(self) -> MISPObject:
|
||||||
# Alert, NWT, Canada
|
# Alert, NWT, Canada
|
||||||
|
@ -200,9 +220,7 @@ class Scrippts:
|
||||||
return tag
|
return tag
|
||||||
|
|
||||||
def fetch(self, url):
|
def fetch(self, url):
|
||||||
filepath = Path('scrippts') / Path(url).name
|
filepath = self.data_dir / Path(url).name
|
||||||
if filepath.exists():
|
|
||||||
return filepath
|
|
||||||
r = requests.get(url)
|
r = requests.get(url)
|
||||||
if r.status_code != 200 or r.text[0] != '"':
|
if r.status_code != 200 or r.text[0] != '"':
|
||||||
print(url)
|
print(url)
|
||||||
|
@ -211,42 +229,42 @@ class Scrippts:
|
||||||
f.write(r.text)
|
f.write(r.text)
|
||||||
return filepath
|
return filepath
|
||||||
|
|
||||||
def get_existing_event_to_update(self, infofield):
|
|
||||||
found = self.misp.search(eventinfo=infofield, pythonify=True)
|
|
||||||
if found:
|
|
||||||
event = found[0]
|
|
||||||
return event
|
|
||||||
return False
|
|
||||||
|
|
||||||
def import_all(self, stations_short_names, interval, data_type):
|
def import_all(self, stations_short_names, interval, data_type):
|
||||||
object_creator = getattr(self, f'{interval}_flask_{data_type}')
|
object_creator = getattr(self, f'{interval}_flask_{data_type}')
|
||||||
if data_type == 'co2':
|
if data_type == 'co2':
|
||||||
base_url = 'http://scrippsco2.ucsd.edu/assets/data/atmospheric/stations/flask_co2/'
|
base_url = 'https://scrippsco2.ucsd.edu/assets/data/atmospheric/stations/flask_co2/'
|
||||||
elif data_type in ['c13', 'o18']:
|
elif data_type in ['c13', 'o18']:
|
||||||
base_url = 'http://scrippsco2.ucsd.edu/assets/data/atmospheric/stations/flask_isotopic/'
|
base_url = 'https://scrippsco2.ucsd.edu/assets/data/atmospheric/stations/flask_isotopic/'
|
||||||
for station in stations_short_names:
|
for station in stations_short_names:
|
||||||
url = f'{base_url}/{interval}/{interval}_flask_{data_type}_{station}.csv'
|
url = f'{base_url}/{interval}/{interval}_flask_{data_type}_{station}.csv'
|
||||||
infofield = f'[{station.upper()}] {interval} average atmospheric {data_type} concentrations'
|
infofield = f'[{station.upper()}] {interval} average atmospheric {data_type} concentrations'
|
||||||
filepath = self.fetch(url)
|
filepath = self.fetch(url)
|
||||||
if not filepath:
|
if not filepath:
|
||||||
continue
|
continue
|
||||||
update = True
|
if infofield in self.scrippts_meta:
|
||||||
event = self.get_existing_event_to_update(infofield)
|
|
||||||
if event:
|
|
||||||
location = event.get_objects_by_name('geolocation')[0]
|
|
||||||
if not event:
|
|
||||||
event = MISPEvent()
|
event = MISPEvent()
|
||||||
|
event.load_file(str(self.output_dir / self.scrippts_meta[infofield]))
|
||||||
|
location = event.get_objects_by_name('geolocation')[0]
|
||||||
|
update = True
|
||||||
|
else:
|
||||||
|
event = MISPEvent()
|
||||||
|
event.uuid = str(uuid4())
|
||||||
event.info = infofield
|
event.info = infofield
|
||||||
|
event.Orgc = self.misp_org
|
||||||
event.add_tag(getattr(self, f'tag_{station}')())
|
event.add_tag(getattr(self, f'tag_{station}')())
|
||||||
location = getattr(self, f'geolocation_{station}')()
|
location = getattr(self, f'geolocation_{station}')()
|
||||||
event.add_object(location)
|
event.add_object(location)
|
||||||
event.add_attribute('link', f'http://scrippsco2.ucsd.edu/data/atmospheric_co2/{station}')
|
event.add_attribute('link', f'https://scrippsco2.ucsd.edu/data/atmospheric_co2/{station}')
|
||||||
update = False
|
update = False
|
||||||
|
with self.scrippts_meta_file.open('a') as f:
|
||||||
|
writer = csv.writer(f)
|
||||||
|
writer.writerow([infofield, f'{event.uuid}.json'])
|
||||||
|
|
||||||
object_creator(event, location, filepath, update)
|
object_creator(event, location, filepath, update)
|
||||||
if update:
|
feed_output = event.to_feed(with_meta=False)
|
||||||
self.misp.update_event(event)
|
with (self.output_dir / f'{event.uuid}.json').open('w') as f:
|
||||||
else:
|
# json.dump(feed_output, f, indent=2, sort_keys=True) # For testing
|
||||||
self.misp.add_event(event)
|
json.dump(feed_output, f)
|
||||||
|
|
||||||
def import_monthly_co2_all(self):
|
def import_monthly_co2_all(self):
|
||||||
to_import = ['alt', 'ptb', 'stp', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd']
|
to_import = ['alt', 'ptb', 'stp', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd']
|
||||||
|
@ -458,10 +476,14 @@ class Scrippts:
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
i = Scrippts()
|
output_dir = 'scrippsc02_feed'
|
||||||
|
|
||||||
|
i = Scrippts(output_dir=output_dir)
|
||||||
i.import_daily_co2_all()
|
i.import_daily_co2_all()
|
||||||
i.import_daily_c13_all()
|
i.import_daily_c13_all()
|
||||||
i.import_daily_o18_all()
|
i.import_daily_o18_all()
|
||||||
i.import_monthly_co2_all()
|
i.import_monthly_co2_all()
|
||||||
i.import_monthly_c13_all()
|
i.import_monthly_c13_all()
|
||||||
i.import_monthly_o18_all()
|
i.import_monthly_o18_all()
|
||||||
|
|
||||||
|
feed_meta_generator(Path(output_dir))
|
|
@ -474,6 +474,8 @@ class MISPEvent(AbstractMISP):
|
||||||
|
|
||||||
def _set_default(self):
|
def _set_default(self):
|
||||||
"""There are a few keys that could be set by default"""
|
"""There are a few keys that could be set by default"""
|
||||||
|
if not hasattr(self, 'published'):
|
||||||
|
self.published = True
|
||||||
if not hasattr(self, 'uuid'):
|
if not hasattr(self, 'uuid'):
|
||||||
self.uuid = str(uuid.uuid4())
|
self.uuid = str(uuid.uuid4())
|
||||||
if not hasattr(self, 'date'):
|
if not hasattr(self, 'date'):
|
||||||
|
@ -623,14 +625,14 @@ class MISPEvent(AbstractMISP):
|
||||||
else:
|
else:
|
||||||
raise PyMISPError('All the attributes have to be of type MISPObject.')
|
raise PyMISPError('All the attributes have to be of type MISPObject.')
|
||||||
|
|
||||||
def load_file(self, event_path):
|
def load_file(self, event_path, validate=False, metadata_only=False):
|
||||||
"""Load a JSON dump from a file on the disk"""
|
"""Load a JSON dump from a file on the disk"""
|
||||||
if not os.path.exists(event_path):
|
if not os.path.exists(event_path):
|
||||||
raise PyMISPError('Invalid path, unable to load the event.')
|
raise PyMISPError('Invalid path, unable to load the event.')
|
||||||
with open(event_path, 'rb') as f:
|
with open(event_path, 'rb') as f:
|
||||||
self.load(f)
|
self.load(f, validate, metadata_only)
|
||||||
|
|
||||||
def load(self, json_event, validate=False):
|
def load(self, json_event, validate=False, metadata_only=False):
|
||||||
"""Load a JSON dump from a pseudo file or a JSON string"""
|
"""Load a JSON dump from a pseudo file or a JSON string"""
|
||||||
if hasattr(json_event, 'read'):
|
if hasattr(json_event, 'read'):
|
||||||
# python2 and python3 compatible to find if we have a file
|
# python2 and python3 compatible to find if we have a file
|
||||||
|
@ -645,6 +647,9 @@ class MISPEvent(AbstractMISP):
|
||||||
event = json_event
|
event = json_event
|
||||||
if not event:
|
if not event:
|
||||||
raise PyMISPError('Invalid event')
|
raise PyMISPError('Invalid event')
|
||||||
|
if metadata_only:
|
||||||
|
event.pop('Attribute', None)
|
||||||
|
event.pop('Object', None)
|
||||||
self.from_dict(**event)
|
self.from_dict(**event)
|
||||||
if validate:
|
if validate:
|
||||||
jsonschema.validate(json.loads(self.to_json()), self.__json_schema)
|
jsonschema.validate(json.loads(self.to_json()), self.__json_schema)
|
||||||
|
@ -718,6 +723,11 @@ class MISPEvent(AbstractMISP):
|
||||||
self.publish_timestamp = datetime.datetime.fromtimestamp(int(kwargs.pop('publish_timestamp')), datetime.timezone.utc)
|
self.publish_timestamp = datetime.datetime.fromtimestamp(int(kwargs.pop('publish_timestamp')), datetime.timezone.utc)
|
||||||
else:
|
else:
|
||||||
self.publish_timestamp = datetime.datetime.fromtimestamp(int(kwargs.pop('publish_timestamp')), UTC())
|
self.publish_timestamp = datetime.datetime.fromtimestamp(int(kwargs.pop('publish_timestamp')), UTC())
|
||||||
|
if kwargs.get('sighting_timestamp'):
|
||||||
|
if sys.version_info >= (3, 3):
|
||||||
|
self.sighting_timestamp = datetime.datetime.fromtimestamp(int(kwargs.pop('sighting_timestamp')), datetime.timezone.utc)
|
||||||
|
else:
|
||||||
|
self.sighting_timestamp = datetime.datetime.fromtimestamp(int(kwargs.pop('sighting_timestamp')), UTC())
|
||||||
if kwargs.get('sharing_group_id'):
|
if kwargs.get('sharing_group_id'):
|
||||||
self.sharing_group_id = int(kwargs.pop('sharing_group_id'))
|
self.sharing_group_id = int(kwargs.pop('sharing_group_id'))
|
||||||
if kwargs.get('RelatedEvent'):
|
if kwargs.get('RelatedEvent'):
|
||||||
|
@ -747,6 +757,8 @@ class MISPEvent(AbstractMISP):
|
||||||
to_return['date'] = self.date.isoformat()
|
to_return['date'] = self.date.isoformat()
|
||||||
if to_return.get('publish_timestamp'):
|
if to_return.get('publish_timestamp'):
|
||||||
to_return['publish_timestamp'] = self._datetime_to_timestamp(self.publish_timestamp)
|
to_return['publish_timestamp'] = self._datetime_to_timestamp(self.publish_timestamp)
|
||||||
|
if to_return.get('sighting_timestamp'):
|
||||||
|
to_return['sighting_timestamp'] = self._datetime_to_timestamp(self.sighting_timestamp)
|
||||||
|
|
||||||
return to_return
|
return to_return
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue