diff --git a/examples/import_scripptsc02.py b/examples/import_scripptsc02.py new file mode 100644 index 0000000..3c7eff0 --- /dev/null +++ b/examples/import_scripptsc02.py @@ -0,0 +1,467 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +from dateutil.parser import parse +import csv +from pathlib import Path +import requests + +from pymisp import MISPEvent, MISPObject, MISPTag + +from keys import misp_url, misp_key, misp_verifycert +from pymisp import ExpandedPyMISP + + +class Scrippts: + + def __init__(self): + self.misp = ExpandedPyMISP(misp_url, misp_key, misp_verifycert) + + def geolocation_alt(self) -> MISPObject: + # Alert, NWT, Canada + location = MISPObject('geolocation', standalone=False) + location.add_attribute('latitude', 82.3) + location.add_attribute('longitude', 62.3) + location.add_attribute('altitude', 210) + location.add_attribute('text', 'Alert, NWT, Canada') + return location + + def tag_alt(self) -> MISPTag: + tag = MISPTag() + tag.name = 'scrippsco2-sampling-stations:ALT' + return tag + + def geolocation_ptb(self): + # Point Barrow, Alaska + location = MISPObject('geolocation') + location.add_attribute('latitude', 71.3) + location.add_attribute('longitude', 156.6) + location.add_attribute('altitude', 11) + location.add_attribute('text', 'Point Barrow, Alaska') + return location + + def tag_ptb(self): + tag = MISPTag() + tag.name = 'scrippsco2-sampling-stations:PTB' + return tag + + def geolocation_stp(self) -> MISPObject: + # Station P + location = MISPObject('geolocation') + location.add_attribute('latitude', 50) + location.add_attribute('longitude', 145) + location.add_attribute('altitude', 0) + location.add_attribute('text', 'Station P') + return location + + def tag_stp(self): + tag = MISPTag() + tag.name = 'scrippsco2-sampling-stations:STP' + return tag + + def geolocation_ljo(self) -> MISPObject: + # La Jolla Pier, California + location = MISPObject('geolocation') + location.add_attribute('latitude', 32.9) + location.add_attribute('longitude', 117.3) + location.add_attribute('altitude', 10) + location.add_attribute('text', 'La Jolla Pier, California') + return location + + def tag_ljo(self): + tag = MISPTag() + tag.name = 'scrippsco2-sampling-stations:LJO' + return tag + + def geolocation_bcs(self) -> MISPObject: + # Baja California Sur, Mexico + location = MISPObject('geolocation') + location.add_attribute('latitude', 23.3) + location.add_attribute('longitude', 110.2) + location.add_attribute('altitude', 4) + location.add_attribute('text', 'Baja California Sur, Mexico') + return location + + def tag_bcs(self): + tag = MISPTag() + tag.name = 'scrippsco2-sampling-stations:BCS' + return tag + + def geolocation_mlo(self) -> MISPObject: + # Mauna Loa Observatory, Hawaii + location = MISPObject('geolocation') + location.add_attribute('latitude', 19.5) + location.add_attribute('longitude', 155.6) + location.add_attribute('altitude', 3397) + location.add_attribute('text', 'Mauna Loa Observatory, Hawaii') + return location + + def tag_mlo(self): + tag = MISPTag() + tag.name = 'scrippsco2-sampling-stations:MLO' + return tag + + def geolocation_kum(self) -> MISPObject: + # Cape Kumukahi, Hawaii + location = MISPObject('geolocation') + location.add_attribute('latitude', 19.5) + location.add_attribute('longitude', 154.8) + location.add_attribute('altitude', 3) + location.add_attribute('text', 'Cape Kumukahi, Hawaii') + return location + + def tag_kum(self): + tag = MISPTag() + tag.name = 'scrippsco2-sampling-stations:KUM' + return tag + + def geolocation_chr(self): + # Christmas Island, Fanning Island + location = MISPObject('geolocation') + location.add_attribute('latitude', 2) + location.add_attribute('longitude', 157.3) + location.add_attribute('altitude', 2) + location.add_attribute('text', 'Christmas Island, Fanning Island') + return location + + def tag_chr(self): + tag = MISPTag() + tag.name = 'scrippsco2-sampling-stations:CHR' + return tag + + def geolocation_sam(self): + # American Samoa + location = MISPObject('geolocation') + location.add_attribute('latitude', 14.2) + location.add_attribute('longitude', 170.6) + location.add_attribute('altitude', 30) + location.add_attribute('text', 'American Samoa') + return location + + def tag_sam(self): + tag = MISPTag() + tag.name = 'scrippsco2-sampling-stations:SAM' + return tag + + def geolocation_ker(self): + # Kermadec Islands, Raoul Island + location = MISPObject('geolocation') + location.add_attribute('latitude', 29.2) + location.add_attribute('longitude', 177.9) + location.add_attribute('altitude', 2) + location.add_attribute('text', 'Kermadec Islands, Raoul Island') + return location + + def tag_ker(self): + tag = MISPTag() + tag.name = 'scrippsco2-sampling-stations:KER' + return tag + + def geolocation_nzd(self): + # Baring Head, New Zealand + location = MISPObject('geolocation') + location.add_attribute('latitude', 41.4) + location.add_attribute('longitude', 174.9) + location.add_attribute('altitude', 85) + location.add_attribute('text', 'Baring Head, New Zealand') + return location + + def tag_nzd(self): + tag = MISPTag() + tag.name = 'scrippsco2-sampling-stations:NZD' + return tag + + def geolocation_psa(self): + # Palmer Station, Antarctica + location = MISPObject('geolocation') + location.add_attribute('latitude', 64.9) + location.add_attribute('longitude', 64) + location.add_attribute('altitude', 10) + location.add_attribute('text', 'Palmer Station, Antarctica') + return location + + def tag_psa(self): + tag = MISPTag() + tag.name = 'scrippsco2-sampling-stations:PSA' + return tag + + def geolocation_spo(self): + # South Pole + location = MISPObject('geolocation') + location.add_attribute('latitude', 90) + location.add_attribute('longitude', 0) + location.add_attribute('altitude', 2810) + location.add_attribute('text', 'South Pole') + return location + + def tag_spo(self): + tag = MISPTag() + tag.name = 'scrippsco2-sampling-stations:SPO' + return tag + + def fetch(self, url): + filepath = Path('scrippts') / Path(url).name + if filepath.exists(): + return filepath + r = requests.get(url) + if r.status_code != 200 or r.text[0] != '"': + print(url) + return False + with filepath.open('w') as f: + f.write(r.text) + return filepath + + def get_existing_event_to_update(self, infofield): + found = self.misp.search(eventinfo=infofield, pythonify=True) + if found: + event = found[0] + return event + return False + + def import_all(self, stations_short_names, interval, data_type): + object_creator = getattr(self, f'{interval}_flask_{data_type}') + if data_type == 'co2': + base_url = 'http://scrippsco2.ucsd.edu/assets/data/atmospheric/stations/flask_co2/' + elif data_type in ['c13', 'o18']: + base_url = 'http://scrippsco2.ucsd.edu/assets/data/atmospheric/stations/flask_isotopic/' + for station in stations_short_names: + url = f'{base_url}/{interval}/{interval}_flask_{data_type}_{station}.csv' + infofield = f'[{station.upper()}] {interval} average atmospheric {data_type} concentrations' + filepath = self.fetch(url) + if not filepath: + continue + update = True + event = self.get_existing_event_to_update(infofield) + if event: + location = event.get_objects_by_name('geolocation')[0] + if not event: + event = MISPEvent() + event.info = infofield + event.add_tag(getattr(self, f'tag_{station}')()) + location = getattr(self, f'geolocation_{station}')() + event.add_object(location) + event.add_attribute('link', f'http://scrippsco2.ucsd.edu/data/atmospheric_co2/{station}') + update = False + object_creator(event, location, filepath, update) + if update: + self.misp.update_event(event) + else: + self.misp.add_event(event) + + def import_monthly_co2_all(self): + to_import = ['alt', 'ptb', 'stp', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd'] + self.import_all(to_import, 'monthly', 'co2') + + def import_monthly_c13_all(self): + to_import = ['alt', 'ptb', 'stp', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd', 'psa', 'spo'] + self.import_all(to_import, 'monthly', 'c13') + + def import_monthly_o18_all(self): + to_import = ['alt', 'ptb', 'stp', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd', 'spo'] + self.import_all(to_import, 'monthly', 'o18') + + def import_daily_co2_all(self): + to_import = ['alt', 'ptb', 'stp', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd'] + self.import_all(to_import, 'daily', 'co2') + + def import_daily_c13_all(self): + to_import = ['alt', 'ptb', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd', 'spo'] + self.import_all(to_import, 'daily', 'c13') + + def import_daily_o18_all(self): + to_import = ['alt', 'ptb', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd', 'spo'] + self.import_all(to_import, 'daily', 'o18') + + def split_data_comment(self, csv_file, update, event): + comment = '' + data = [] + with csv_file.open() as f: + for line in f: + if line[0] == '"': + if update: + continue + if '----------' in line: + event.add_attribute('comment', comment, disable_correlation=True) + comment = '' + continue + comment += line[1:-1].strip() + else: + data.append(line) + if not update: + event.add_attribute('comment', comment, disable_correlation=True) + return data + + def monthly_flask_co2(self, event, location, csv_file, update): + data = self.split_data_comment(csv_file, update, event) + + dates_already_imported = [] + if update: + # get all datetime from existing event + for obj in event.get_objects_by_name('scrippsco2-co2-monthly'): + date_attribute = obj.get_attributes_by_relation('sample-datetime')[0] + dates_already_imported.append(date_attribute.value) + + reader = csv.reader(data) + for row in reader: + if not row[0].isdigit(): + # This file has fucked up headers + continue + sample_date = parse(f'{row[0]}-{row[1]}-16T00:00:00') + if sample_date in dates_already_imported: + continue + obj = MISPObject('scrippsco2-co2-monthly', standalone=False) + obj.add_attribute('sample-datetime', sample_date) + obj.add_attribute('sample-date-excel', float(row[2])) + obj.add_attribute('sample-date-fractional', float(row[3])) + obj.add_attribute('monthly-co2', float(row[4])) + obj.add_attribute('monthly-co2-seasonal-adjustment', float(row[5])) + obj.add_attribute('monthly-co2-smoothed', float(row[6])) + obj.add_attribute('monthly-co2-smoothed-seasonal-adjustment', float(row[7])) + obj.add_reference(location, 'sampling-location') + event.add_object(obj) + + def monthly_flask_c13(self, event, location, csv_file, update): + data = self.split_data_comment(csv_file, update, event) + + dates_already_imported = [] + if update: + # get all datetime from existing event + for obj in event.get_objects_by_name('scrippsco2-c13-monthly'): + date_attribute = obj.get_attributes_by_relation('sample-datetime')[0] + dates_already_imported.append(date_attribute.value) + + reader = csv.reader(data) + for row in reader: + if not row[0].isdigit(): + # This file has fucked up headers + continue + sample_date = parse(f'{row[0]}-{row[1]}-16T00:00:00') + if sample_date in dates_already_imported: + continue + obj = MISPObject('scrippsco2-c13-monthly', standalone=False) + obj.add_attribute('sample-datetime', sample_date) + obj.add_attribute('sample-date-excel', float(row[2])) + obj.add_attribute('sample-date-fractional', float(row[3])) + obj.add_attribute('monthly-c13', float(row[4])) + obj.add_attribute('monthly-c13-seasonal-adjustment', float(row[5])) + obj.add_attribute('monthly-c13-smoothed', float(row[6])) + obj.add_attribute('monthly-c13-smoothed-seasonal-adjustment', float(row[7])) + obj.add_reference(location, 'sampling-location') + event.add_object(obj) + + def monthly_flask_o18(self, event, location, csv_file, update): + data = self.split_data_comment(csv_file, update, event) + + dates_already_imported = [] + if update: + # get all datetime from existing event + for obj in event.get_objects_by_name('scrippsco2-o18-monthly'): + date_attribute = obj.get_attributes_by_relation('sample-datetime')[0] + dates_already_imported.append(date_attribute.value) + + reader = csv.reader(data) + for row in reader: + if not row[0].isdigit(): + # This file has fucked up headers + continue + sample_date = parse(f'{row[0]}-{row[1]}-16T00:00:00') + if sample_date in dates_already_imported: + continue + obj = MISPObject('scrippsco2-o18-monthly', standalone=False) + obj.add_attribute('sample-datetime', sample_date) + obj.add_attribute('sample-date-excel', float(row[2])) + obj.add_attribute('sample-date-fractional', float(row[3])) + obj.add_attribute('monthly-o18', float(row[4])) + obj.add_attribute('monthly-o18-seasonal-adjustment', float(row[5])) + obj.add_attribute('monthly-o18-smoothed', float(row[6])) + obj.add_attribute('monthly-o18-smoothed-seasonal-adjustment', float(row[7])) + obj.add_reference(location, 'sampling-location') + event.add_object(obj) + + def daily_flask_co2(self, event, location, csv_file, update): + data = self.split_data_comment(csv_file, update, event) + + dates_already_imported = [] + if update: + # get all datetime from existing event + for obj in event.get_objects_by_name('scrippsco2-co2-daily'): + date_attribute = obj.get_attributes_by_relation('sample-datetime')[0] + dates_already_imported.append(date_attribute.value) + + reader = csv.reader(data) + for row in reader: + sample_date = parse(f'{row[0]}-{row[1]}') + if sample_date in dates_already_imported: + continue + obj = MISPObject('scrippsco2-co2-daily', standalone=False) + obj.add_attribute('sample-datetime', sample_date) + obj.add_attribute('sample-date-excel', float(row[2])) + obj.add_attribute('sample-date-fractional', float(row[3])) + obj.add_attribute('number-flask', int(row[4])) + obj.add_attribute('flag', int(row[5])) + attr = obj.add_attribute('co2-value', float(row[6])) + attr.add_tag(f'scrippsco2-fgc:{int(row[5])}') + obj.add_reference(location, 'sampling-location') + event.add_object(obj) + + def daily_flask_c13(self, event, location, csv_file, update): + data = self.split_data_comment(csv_file, update, event) + + dates_already_imported = [] + if update: + # get all datetime from existing event + for obj in event.get_objects_by_name('scrippsco2-c13-daily'): + date_attribute = obj.get_attributes_by_relation('sample-datetime')[0] + dates_already_imported.append(date_attribute.value) + + reader = csv.reader(data) + for row in reader: + sample_date = parse(f'{row[0]}-{row[1]}') + if sample_date in dates_already_imported: + continue + obj = MISPObject('scrippsco2-c13-daily', standalone=False) + obj.add_attribute('sample-datetime', sample_date) + obj.add_attribute('sample-date-excel', float(row[2])) + obj.add_attribute('sample-date-fractional', float(row[3])) + obj.add_attribute('number-flask', int(row[4])) + obj.add_attribute('flag', int(row[5])) + attr = obj.add_attribute('c13-value', float(row[6])) + attr.add_tag(f'scrippsco2-fgi:{int(row[5])}') + obj.add_reference(location, 'sampling-location') + event.add_object(obj) + + def daily_flask_o18(self, event, location, csv_file, update): + data = self.split_data_comment(csv_file, update, event) + + dates_already_imported = [] + if update: + # get all datetime from existing event + for obj in event.get_objects_by_name('scrippsco2-o18-daily'): + date_attribute = obj.get_attributes_by_relation('sample-datetime')[0] + dates_already_imported.append(date_attribute.value) + + reader = csv.reader(data) + for row in reader: + sample_date = parse(f'{row[0]}-{row[1]}') + if sample_date in dates_already_imported: + continue + obj = MISPObject('scrippsco2-o18-daily', standalone=False) + obj.add_attribute('sample-datetime', sample_date) + obj.add_attribute('sample-date-excel', float(row[2])) + obj.add_attribute('sample-date-fractional', float(row[3])) + obj.add_attribute('number-flask', int(row[4])) + obj.add_attribute('flag', int(row[5])) + attr = obj.add_attribute('o18-value', float(row[6])) + attr.add_tag(f'scrippsco2-fgi:{int(row[5])}') + obj.add_reference(location, 'sampling-location') + event.add_object(obj) + + +if __name__ == '__main__': + i = Scrippts() + i.import_daily_co2_all() + i.import_daily_c13_all() + i.import_daily_o18_all() + i.import_monthly_co2_all() + i.import_monthly_c13_all() + i.import_monthly_o18_all()