mirror of https://github.com/MISP/PyMISP
494 lines
20 KiB
Python
494 lines
20 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import datetime
|
|
from dateutil.parser import parse
|
|
import csv
|
|
from pathlib import Path
|
|
import json
|
|
from uuid import uuid4
|
|
import requests
|
|
|
|
from pymisp import MISPEvent, MISPObject, MISPTag, MISPOrganisation
|
|
from pymisp.tools import feed_meta_generator
|
|
|
|
|
|
class Scrippts:
|
|
|
|
def __init__(self, output_dir: str= 'output', org_name: str='CIRCL',
|
|
org_uuid: str='55f6ea5e-2c60-40e5-964f-47a8950d210f'):
|
|
self.misp_org = MISPOrganisation()
|
|
self.misp_org.name = org_name
|
|
self.misp_org.uuid = org_uuid
|
|
|
|
self.output_dir = Path(output_dir)
|
|
self.output_dir.mkdir(exist_ok=True)
|
|
|
|
self.data_dir = self.output_dir / 'data'
|
|
self.data_dir.mkdir(exist_ok=True)
|
|
|
|
self.scrippts_meta_file = self.output_dir / '.meta_scrippts'
|
|
self.scrippts_meta = {}
|
|
if self.scrippts_meta_file.exists():
|
|
# Format: <infofield>,<uuid>.json
|
|
with self.scrippts_meta_file.open() as f:
|
|
reader = csv.reader(f)
|
|
for row in reader:
|
|
self.scrippts_meta[row[0]] = row[1]
|
|
else:
|
|
self.scrippts_meta_file.touch()
|
|
|
|
def geolocation_alt(self) -> MISPObject:
|
|
# Alert, NWT, Canada
|
|
location = MISPObject('geolocation', standalone=False)
|
|
location.add_attribute('latitude', 82.3)
|
|
location.add_attribute('longitude', 62.3)
|
|
location.add_attribute('altitude', 210)
|
|
location.add_attribute('text', 'Alert, NWT, Canada')
|
|
return location
|
|
|
|
def tag_alt(self) -> MISPTag:
|
|
tag = MISPTag()
|
|
tag.name = 'scrippsco2-sampling-stations:ALT'
|
|
return tag
|
|
|
|
def geolocation_ptb(self):
|
|
# Point Barrow, Alaska
|
|
location = MISPObject('geolocation')
|
|
location.add_attribute('latitude', 71.3)
|
|
location.add_attribute('longitude', 156.6)
|
|
location.add_attribute('altitude', 11)
|
|
location.add_attribute('text', 'Point Barrow, Alaska')
|
|
return location
|
|
|
|
def tag_ptb(self):
|
|
tag = MISPTag()
|
|
tag.name = 'scrippsco2-sampling-stations:PTB'
|
|
return tag
|
|
|
|
def geolocation_stp(self) -> MISPObject:
|
|
# Station P
|
|
location = MISPObject('geolocation')
|
|
location.add_attribute('latitude', 50)
|
|
location.add_attribute('longitude', 145)
|
|
location.add_attribute('altitude', 0)
|
|
location.add_attribute('text', 'Station P')
|
|
return location
|
|
|
|
def tag_stp(self):
|
|
tag = MISPTag()
|
|
tag.name = 'scrippsco2-sampling-stations:STP'
|
|
return tag
|
|
|
|
def geolocation_ljo(self) -> MISPObject:
|
|
# La Jolla Pier, California
|
|
location = MISPObject('geolocation')
|
|
location.add_attribute('latitude', 32.9)
|
|
location.add_attribute('longitude', 117.3)
|
|
location.add_attribute('altitude', 10)
|
|
location.add_attribute('text', 'La Jolla Pier, California')
|
|
return location
|
|
|
|
def tag_ljo(self):
|
|
tag = MISPTag()
|
|
tag.name = 'scrippsco2-sampling-stations:LJO'
|
|
return tag
|
|
|
|
def geolocation_bcs(self) -> MISPObject:
|
|
# Baja California Sur, Mexico
|
|
location = MISPObject('geolocation')
|
|
location.add_attribute('latitude', 23.3)
|
|
location.add_attribute('longitude', 110.2)
|
|
location.add_attribute('altitude', 4)
|
|
location.add_attribute('text', 'Baja California Sur, Mexico')
|
|
return location
|
|
|
|
def tag_bcs(self):
|
|
tag = MISPTag()
|
|
tag.name = 'scrippsco2-sampling-stations:BCS'
|
|
return tag
|
|
|
|
def geolocation_mlo(self) -> MISPObject:
|
|
# Mauna Loa Observatory, Hawaii
|
|
location = MISPObject('geolocation')
|
|
location.add_attribute('latitude', 19.5)
|
|
location.add_attribute('longitude', 155.6)
|
|
location.add_attribute('altitude', 3397)
|
|
location.add_attribute('text', 'Mauna Loa Observatory, Hawaii')
|
|
return location
|
|
|
|
def tag_mlo(self):
|
|
tag = MISPTag()
|
|
tag.name = 'scrippsco2-sampling-stations:MLO'
|
|
return tag
|
|
|
|
def geolocation_kum(self) -> MISPObject:
|
|
# Cape Kumukahi, Hawaii
|
|
location = MISPObject('geolocation')
|
|
location.add_attribute('latitude', 19.5)
|
|
location.add_attribute('longitude', 154.8)
|
|
location.add_attribute('altitude', 3)
|
|
location.add_attribute('text', 'Cape Kumukahi, Hawaii')
|
|
return location
|
|
|
|
def tag_kum(self):
|
|
tag = MISPTag()
|
|
tag.name = 'scrippsco2-sampling-stations:KUM'
|
|
return tag
|
|
|
|
def geolocation_chr(self):
|
|
# Christmas Island, Fanning Island
|
|
location = MISPObject('geolocation')
|
|
location.add_attribute('latitude', 2)
|
|
location.add_attribute('longitude', 157.3)
|
|
location.add_attribute('altitude', 2)
|
|
location.add_attribute('text', 'Christmas Island, Fanning Island')
|
|
return location
|
|
|
|
def tag_chr(self):
|
|
tag = MISPTag()
|
|
tag.name = 'scrippsco2-sampling-stations:CHR'
|
|
return tag
|
|
|
|
def geolocation_sam(self):
|
|
# American Samoa
|
|
location = MISPObject('geolocation')
|
|
location.add_attribute('latitude', 14.2)
|
|
location.add_attribute('longitude', 170.6)
|
|
location.add_attribute('altitude', 30)
|
|
location.add_attribute('text', 'American Samoa')
|
|
return location
|
|
|
|
def tag_sam(self):
|
|
tag = MISPTag()
|
|
tag.name = 'scrippsco2-sampling-stations:SAM'
|
|
return tag
|
|
|
|
def geolocation_ker(self):
|
|
# Kermadec Islands, Raoul Island
|
|
location = MISPObject('geolocation')
|
|
location.add_attribute('latitude', 29.2)
|
|
location.add_attribute('longitude', 177.9)
|
|
location.add_attribute('altitude', 2)
|
|
location.add_attribute('text', 'Kermadec Islands, Raoul Island')
|
|
return location
|
|
|
|
def tag_ker(self):
|
|
tag = MISPTag()
|
|
tag.name = 'scrippsco2-sampling-stations:KER'
|
|
return tag
|
|
|
|
def geolocation_nzd(self):
|
|
# Baring Head, New Zealand
|
|
location = MISPObject('geolocation')
|
|
location.add_attribute('latitude', 41.4)
|
|
location.add_attribute('longitude', 174.9)
|
|
location.add_attribute('altitude', 85)
|
|
location.add_attribute('text', 'Baring Head, New Zealand')
|
|
return location
|
|
|
|
def tag_nzd(self):
|
|
tag = MISPTag()
|
|
tag.name = 'scrippsco2-sampling-stations:NZD'
|
|
return tag
|
|
|
|
def geolocation_psa(self):
|
|
# Palmer Station, Antarctica
|
|
location = MISPObject('geolocation')
|
|
location.add_attribute('latitude', 64.9)
|
|
location.add_attribute('longitude', 64)
|
|
location.add_attribute('altitude', 10)
|
|
location.add_attribute('text', 'Palmer Station, Antarctica')
|
|
return location
|
|
|
|
def tag_psa(self):
|
|
tag = MISPTag()
|
|
tag.name = 'scrippsco2-sampling-stations:PSA'
|
|
return tag
|
|
|
|
def geolocation_spo(self):
|
|
# South Pole
|
|
location = MISPObject('geolocation')
|
|
location.add_attribute('latitude', 90)
|
|
location.add_attribute('longitude', 0)
|
|
location.add_attribute('altitude', 2810)
|
|
location.add_attribute('text', 'South Pole')
|
|
return location
|
|
|
|
def tag_spo(self):
|
|
tag = MISPTag()
|
|
tag.name = 'scrippsco2-sampling-stations:SPO'
|
|
return tag
|
|
|
|
def fetch(self, url):
|
|
filepath = self.data_dir / Path(url).name
|
|
r = requests.get(url)
|
|
if r.status_code != 200 or r.text[0] != '"':
|
|
print(url)
|
|
return False
|
|
with filepath.open('w') as f:
|
|
f.write(r.text)
|
|
return filepath
|
|
|
|
def import_all(self, stations_short_names, interval, data_type):
|
|
object_creator = getattr(self, f'{interval}_flask_{data_type}')
|
|
if data_type == 'co2':
|
|
base_url = 'https://scrippsco2.ucsd.edu/assets/data/atmospheric/stations/flask_co2/'
|
|
elif data_type in ['c13', 'o18']:
|
|
base_url = 'https://scrippsco2.ucsd.edu/assets/data/atmospheric/stations/flask_isotopic/'
|
|
for station in stations_short_names:
|
|
url = f'{base_url}/{interval}/{interval}_flask_{data_type}_{station}.csv'
|
|
infofield = f'[{station.upper()}] {interval} average atmospheric {data_type} concentrations'
|
|
filepath = self.fetch(url)
|
|
if not filepath:
|
|
continue
|
|
if infofield in self.scrippts_meta:
|
|
event = MISPEvent()
|
|
event.load_file(str(self.output_dir / self.scrippts_meta[infofield]))
|
|
location = event.get_objects_by_name('geolocation')[0]
|
|
update = True
|
|
else:
|
|
event = MISPEvent()
|
|
event.uuid = str(uuid4())
|
|
event.info = infofield
|
|
event.Orgc = self.misp_org
|
|
event.add_tag(getattr(self, f'tag_{station}')())
|
|
location = getattr(self, f'geolocation_{station}')()
|
|
event.add_object(location)
|
|
event.add_attribute('link', f'https://scrippsco2.ucsd.edu/data/atmospheric_co2/{station}')
|
|
update = False
|
|
with self.scrippts_meta_file.open('a') as f:
|
|
writer = csv.writer(f)
|
|
writer.writerow([infofield, f'{event.uuid}.json'])
|
|
|
|
object_creator(event, location, filepath, update)
|
|
if update:
|
|
# Bump the publish timestamp
|
|
event.publish_timestamp = datetime.datetime.timestamp(datetime.datetime.now())
|
|
feed_output = event.to_feed(with_meta=False)
|
|
with (self.output_dir / f'{event.uuid}.json').open('w') as f:
|
|
# json.dump(feed_output, f, indent=2, sort_keys=True) # For testing
|
|
json.dump(feed_output, f)
|
|
|
|
def import_monthly_co2_all(self):
|
|
to_import = ['alt', 'ptb', 'stp', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd']
|
|
self.import_all(to_import, 'monthly', 'co2')
|
|
|
|
def import_monthly_c13_all(self):
|
|
to_import = ['alt', 'ptb', 'stp', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd', 'psa', 'spo']
|
|
self.import_all(to_import, 'monthly', 'c13')
|
|
|
|
def import_monthly_o18_all(self):
|
|
to_import = ['alt', 'ptb', 'stp', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd', 'spo']
|
|
self.import_all(to_import, 'monthly', 'o18')
|
|
|
|
def import_daily_co2_all(self):
|
|
to_import = ['alt', 'ptb', 'stp', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd']
|
|
self.import_all(to_import, 'daily', 'co2')
|
|
|
|
def import_daily_c13_all(self):
|
|
to_import = ['alt', 'ptb', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd', 'spo']
|
|
self.import_all(to_import, 'daily', 'c13')
|
|
|
|
def import_daily_o18_all(self):
|
|
to_import = ['alt', 'ptb', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd', 'spo']
|
|
self.import_all(to_import, 'daily', 'o18')
|
|
|
|
def split_data_comment(self, csv_file, update, event):
|
|
comment = ''
|
|
data = []
|
|
with csv_file.open() as f:
|
|
for line in f:
|
|
if line[0] == '"':
|
|
if update:
|
|
continue
|
|
if '----------' in line:
|
|
event.add_attribute('comment', comment, disable_correlation=True)
|
|
comment = ''
|
|
continue
|
|
comment += line[1:-1].strip()
|
|
else:
|
|
data.append(line)
|
|
if not update:
|
|
event.add_attribute('comment', comment, disable_correlation=True)
|
|
return data
|
|
|
|
def monthly_flask_co2(self, event, location, csv_file, update):
|
|
data = self.split_data_comment(csv_file, update, event)
|
|
|
|
dates_already_imported = []
|
|
if update:
|
|
# get all datetime from existing event
|
|
for obj in event.get_objects_by_name('scrippsco2-co2-monthly'):
|
|
date_attribute = obj.get_attributes_by_relation('sample-datetime')[0]
|
|
dates_already_imported.append(date_attribute.value)
|
|
|
|
reader = csv.reader(data)
|
|
for row in reader:
|
|
if not row[0].isdigit():
|
|
# This file has fucked up headers
|
|
continue
|
|
sample_date = parse(f'{row[0]}-{row[1]}-16T00:00:00')
|
|
if sample_date in dates_already_imported:
|
|
continue
|
|
obj = MISPObject('scrippsco2-co2-monthly', standalone=False)
|
|
obj.add_attribute('sample-datetime', sample_date)
|
|
obj.add_attribute('sample-date-excel', float(row[2]))
|
|
obj.add_attribute('sample-date-fractional', float(row[3]))
|
|
obj.add_attribute('monthly-co2', float(row[4]))
|
|
obj.add_attribute('monthly-co2-seasonal-adjustment', float(row[5]))
|
|
obj.add_attribute('monthly-co2-smoothed', float(row[6]))
|
|
obj.add_attribute('monthly-co2-smoothed-seasonal-adjustment', float(row[7]))
|
|
obj.add_reference(location, 'sampling-location')
|
|
event.add_object(obj)
|
|
|
|
def monthly_flask_c13(self, event, location, csv_file, update):
|
|
data = self.split_data_comment(csv_file, update, event)
|
|
|
|
dates_already_imported = []
|
|
if update:
|
|
# get all datetime from existing event
|
|
for obj in event.get_objects_by_name('scrippsco2-c13-monthly'):
|
|
date_attribute = obj.get_attributes_by_relation('sample-datetime')[0]
|
|
dates_already_imported.append(date_attribute.value)
|
|
|
|
reader = csv.reader(data)
|
|
for row in reader:
|
|
if not row[0].isdigit():
|
|
# This file has fucked up headers
|
|
continue
|
|
sample_date = parse(f'{row[0]}-{row[1]}-16T00:00:00')
|
|
if sample_date in dates_already_imported:
|
|
continue
|
|
obj = MISPObject('scrippsco2-c13-monthly', standalone=False)
|
|
obj.add_attribute('sample-datetime', sample_date)
|
|
obj.add_attribute('sample-date-excel', float(row[2]))
|
|
obj.add_attribute('sample-date-fractional', float(row[3]))
|
|
obj.add_attribute('monthly-c13', float(row[4]))
|
|
obj.add_attribute('monthly-c13-seasonal-adjustment', float(row[5]))
|
|
obj.add_attribute('monthly-c13-smoothed', float(row[6]))
|
|
obj.add_attribute('monthly-c13-smoothed-seasonal-adjustment', float(row[7]))
|
|
obj.add_reference(location, 'sampling-location')
|
|
event.add_object(obj)
|
|
|
|
def monthly_flask_o18(self, event, location, csv_file, update):
|
|
data = self.split_data_comment(csv_file, update, event)
|
|
|
|
dates_already_imported = []
|
|
if update:
|
|
# get all datetime from existing event
|
|
for obj in event.get_objects_by_name('scrippsco2-o18-monthly'):
|
|
date_attribute = obj.get_attributes_by_relation('sample-datetime')[0]
|
|
dates_already_imported.append(date_attribute.value)
|
|
|
|
reader = csv.reader(data)
|
|
for row in reader:
|
|
if not row[0].isdigit():
|
|
# This file has fucked up headers
|
|
continue
|
|
sample_date = parse(f'{row[0]}-{row[1]}-16T00:00:00')
|
|
if sample_date in dates_already_imported:
|
|
continue
|
|
obj = MISPObject('scrippsco2-o18-monthly', standalone=False)
|
|
obj.add_attribute('sample-datetime', sample_date)
|
|
obj.add_attribute('sample-date-excel', float(row[2]))
|
|
obj.add_attribute('sample-date-fractional', float(row[3]))
|
|
obj.add_attribute('monthly-o18', float(row[4]))
|
|
obj.add_attribute('monthly-o18-seasonal-adjustment', float(row[5]))
|
|
obj.add_attribute('monthly-o18-smoothed', float(row[6]))
|
|
obj.add_attribute('monthly-o18-smoothed-seasonal-adjustment', float(row[7]))
|
|
obj.add_reference(location, 'sampling-location')
|
|
event.add_object(obj)
|
|
|
|
def daily_flask_co2(self, event, location, csv_file, update):
|
|
data = self.split_data_comment(csv_file, update, event)
|
|
|
|
dates_already_imported = []
|
|
if update:
|
|
# get all datetime from existing event
|
|
for obj in event.get_objects_by_name('scrippsco2-co2-daily'):
|
|
date_attribute = obj.get_attributes_by_relation('sample-datetime')[0]
|
|
dates_already_imported.append(date_attribute.value)
|
|
|
|
reader = csv.reader(data)
|
|
for row in reader:
|
|
sample_date = parse(f'{row[0]}-{row[1]}')
|
|
if sample_date in dates_already_imported:
|
|
continue
|
|
obj = MISPObject('scrippsco2-co2-daily', standalone=False)
|
|
obj.add_attribute('sample-datetime', sample_date)
|
|
obj.add_attribute('sample-date-excel', float(row[2]))
|
|
obj.add_attribute('sample-date-fractional', float(row[3]))
|
|
obj.add_attribute('number-flask', int(row[4]))
|
|
obj.add_attribute('flag', int(row[5]))
|
|
attr = obj.add_attribute('co2-value', float(row[6]))
|
|
attr.add_tag(f'scrippsco2-fgc:{int(row[5])}')
|
|
obj.add_reference(location, 'sampling-location')
|
|
event.add_object(obj)
|
|
|
|
def daily_flask_c13(self, event, location, csv_file, update):
|
|
data = self.split_data_comment(csv_file, update, event)
|
|
|
|
dates_already_imported = []
|
|
if update:
|
|
# get all datetime from existing event
|
|
for obj in event.get_objects_by_name('scrippsco2-c13-daily'):
|
|
date_attribute = obj.get_attributes_by_relation('sample-datetime')[0]
|
|
dates_already_imported.append(date_attribute.value)
|
|
|
|
reader = csv.reader(data)
|
|
for row in reader:
|
|
sample_date = parse(f'{row[0]}-{row[1]}')
|
|
if sample_date in dates_already_imported:
|
|
continue
|
|
obj = MISPObject('scrippsco2-c13-daily', standalone=False)
|
|
obj.add_attribute('sample-datetime', sample_date)
|
|
obj.add_attribute('sample-date-excel', float(row[2]))
|
|
obj.add_attribute('sample-date-fractional', float(row[3]))
|
|
obj.add_attribute('number-flask', int(row[4]))
|
|
obj.add_attribute('flag', int(row[5]))
|
|
attr = obj.add_attribute('c13-value', float(row[6]))
|
|
attr.add_tag(f'scrippsco2-fgi:{int(row[5])}')
|
|
obj.add_reference(location, 'sampling-location')
|
|
event.add_object(obj)
|
|
|
|
def daily_flask_o18(self, event, location, csv_file, update):
|
|
data = self.split_data_comment(csv_file, update, event)
|
|
|
|
dates_already_imported = []
|
|
if update:
|
|
# get all datetime from existing event
|
|
for obj in event.get_objects_by_name('scrippsco2-o18-daily'):
|
|
date_attribute = obj.get_attributes_by_relation('sample-datetime')[0]
|
|
dates_already_imported.append(date_attribute.value)
|
|
|
|
reader = csv.reader(data)
|
|
for row in reader:
|
|
sample_date = parse(f'{row[0]}-{row[1]}')
|
|
if sample_date in dates_already_imported:
|
|
continue
|
|
obj = MISPObject('scrippsco2-o18-daily', standalone=False)
|
|
obj.add_attribute('sample-datetime', sample_date)
|
|
obj.add_attribute('sample-date-excel', float(row[2]))
|
|
obj.add_attribute('sample-date-fractional', float(row[3]))
|
|
obj.add_attribute('number-flask', int(row[4]))
|
|
obj.add_attribute('flag', int(row[5]))
|
|
attr = obj.add_attribute('o18-value', float(row[6]))
|
|
attr.add_tag(f'scrippsco2-fgi:{int(row[5])}')
|
|
obj.add_reference(location, 'sampling-location')
|
|
event.add_object(obj)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
output_dir = 'scrippsco2_feed'
|
|
|
|
i = Scrippts(output_dir=output_dir)
|
|
i.import_daily_co2_all()
|
|
i.import_daily_c13_all()
|
|
i.import_daily_o18_all()
|
|
i.import_monthly_co2_all()
|
|
i.import_monthly_c13_all()
|
|
i.import_monthly_o18_all()
|
|
|
|
feed_meta_generator(Path(output_dir))
|