#!/usr/bin/env python3 # -*- coding: utf-8 -*- import datetime from dateutil.parser import parse import csv from pathlib import Path import json from uuid import uuid4 import requests from pymisp import MISPEvent, MISPObject, MISPTag, MISPOrganisation from pymisp.tools import feed_meta_generator class Scrippts: def __init__(self, output_dir: str= 'output', org_name: str='CIRCL', org_uuid: str='55f6ea5e-2c60-40e5-964f-47a8950d210f'): self.misp_org = MISPOrganisation() self.misp_org.name = org_name self.misp_org.uuid = org_uuid self.output_dir = Path(output_dir) self.output_dir.mkdir(exist_ok=True) self.data_dir = self.output_dir / 'data' self.data_dir.mkdir(exist_ok=True) self.scrippts_meta_file = self.output_dir / '.meta_scrippts' self.scrippts_meta = {} if self.scrippts_meta_file.exists(): # Format: ,.json with self.scrippts_meta_file.open() as f: reader = csv.reader(f) for row in reader: self.scrippts_meta[row[0]] = row[1] else: self.scrippts_meta_file.touch() def geolocation_alt(self) -> MISPObject: # Alert, NWT, Canada location = MISPObject('geolocation', standalone=False) location.add_attribute('latitude', 82.3) location.add_attribute('longitude', 62.3) location.add_attribute('altitude', 210) location.add_attribute('text', 'Alert, NWT, Canada') return location def tag_alt(self) -> MISPTag: tag = MISPTag() tag.name = 'scrippsco2-sampling-stations:ALT' return tag def geolocation_ptb(self): # Point Barrow, Alaska location = MISPObject('geolocation') location.add_attribute('latitude', 71.3) location.add_attribute('longitude', 156.6) location.add_attribute('altitude', 11) location.add_attribute('text', 'Point Barrow, Alaska') return location def tag_ptb(self): tag = MISPTag() tag.name = 'scrippsco2-sampling-stations:PTB' return tag def geolocation_stp(self) -> MISPObject: # Station P location = MISPObject('geolocation') location.add_attribute('latitude', 50) location.add_attribute('longitude', 145) location.add_attribute('altitude', 0) location.add_attribute('text', 'Station P') return location def tag_stp(self): tag = MISPTag() tag.name = 'scrippsco2-sampling-stations:STP' return tag def geolocation_ljo(self) -> MISPObject: # La Jolla Pier, California location = MISPObject('geolocation') location.add_attribute('latitude', 32.9) location.add_attribute('longitude', 117.3) location.add_attribute('altitude', 10) location.add_attribute('text', 'La Jolla Pier, California') return location def tag_ljo(self): tag = MISPTag() tag.name = 'scrippsco2-sampling-stations:LJO' return tag def geolocation_bcs(self) -> MISPObject: # Baja California Sur, Mexico location = MISPObject('geolocation') location.add_attribute('latitude', 23.3) location.add_attribute('longitude', 110.2) location.add_attribute('altitude', 4) location.add_attribute('text', 'Baja California Sur, Mexico') return location def tag_bcs(self): tag = MISPTag() tag.name = 'scrippsco2-sampling-stations:BCS' return tag def geolocation_mlo(self) -> MISPObject: # Mauna Loa Observatory, Hawaii location = MISPObject('geolocation') location.add_attribute('latitude', 19.5) location.add_attribute('longitude', 155.6) location.add_attribute('altitude', 3397) location.add_attribute('text', 'Mauna Loa Observatory, Hawaii') return location def tag_mlo(self): tag = MISPTag() tag.name = 'scrippsco2-sampling-stations:MLO' return tag def geolocation_kum(self) -> MISPObject: # Cape Kumukahi, Hawaii location = MISPObject('geolocation') location.add_attribute('latitude', 19.5) location.add_attribute('longitude', 154.8) location.add_attribute('altitude', 3) location.add_attribute('text', 'Cape Kumukahi, Hawaii') return location def tag_kum(self): tag = MISPTag() tag.name = 'scrippsco2-sampling-stations:KUM' return tag def geolocation_chr(self): # Christmas Island, Fanning Island location = MISPObject('geolocation') location.add_attribute('latitude', 2) location.add_attribute('longitude', 157.3) location.add_attribute('altitude', 2) location.add_attribute('text', 'Christmas Island, Fanning Island') return location def tag_chr(self): tag = MISPTag() tag.name = 'scrippsco2-sampling-stations:CHR' return tag def geolocation_sam(self): # American Samoa location = MISPObject('geolocation') location.add_attribute('latitude', 14.2) location.add_attribute('longitude', 170.6) location.add_attribute('altitude', 30) location.add_attribute('text', 'American Samoa') return location def tag_sam(self): tag = MISPTag() tag.name = 'scrippsco2-sampling-stations:SAM' return tag def geolocation_ker(self): # Kermadec Islands, Raoul Island location = MISPObject('geolocation') location.add_attribute('latitude', 29.2) location.add_attribute('longitude', 177.9) location.add_attribute('altitude', 2) location.add_attribute('text', 'Kermadec Islands, Raoul Island') return location def tag_ker(self): tag = MISPTag() tag.name = 'scrippsco2-sampling-stations:KER' return tag def geolocation_nzd(self): # Baring Head, New Zealand location = MISPObject('geolocation') location.add_attribute('latitude', 41.4) location.add_attribute('longitude', 174.9) location.add_attribute('altitude', 85) location.add_attribute('text', 'Baring Head, New Zealand') return location def tag_nzd(self): tag = MISPTag() tag.name = 'scrippsco2-sampling-stations:NZD' return tag def geolocation_psa(self): # Palmer Station, Antarctica location = MISPObject('geolocation') location.add_attribute('latitude', 64.9) location.add_attribute('longitude', 64) location.add_attribute('altitude', 10) location.add_attribute('text', 'Palmer Station, Antarctica') return location def tag_psa(self): tag = MISPTag() tag.name = 'scrippsco2-sampling-stations:PSA' return tag def geolocation_spo(self): # South Pole location = MISPObject('geolocation') location.add_attribute('latitude', 90) location.add_attribute('longitude', 0) location.add_attribute('altitude', 2810) location.add_attribute('text', 'South Pole') return location def tag_spo(self): tag = MISPTag() tag.name = 'scrippsco2-sampling-stations:SPO' return tag def fetch(self, url): filepath = self.data_dir / Path(url).name r = requests.get(url) if r.status_code != 200 or r.text[0] != '"': print(url) return False with filepath.open('w') as f: f.write(r.text) return filepath def import_all(self, stations_short_names, interval, data_type): object_creator = getattr(self, f'{interval}_flask_{data_type}') if data_type == 'co2': base_url = 'https://scrippsco2.ucsd.edu/assets/data/atmospheric/stations/flask_co2/' elif data_type in ['c13', 'o18']: base_url = 'https://scrippsco2.ucsd.edu/assets/data/atmospheric/stations/flask_isotopic/' for station in stations_short_names: url = f'{base_url}/{interval}/{interval}_flask_{data_type}_{station}.csv' infofield = f'[{station.upper()}] {interval} average atmospheric {data_type} concentrations' filepath = self.fetch(url) if not filepath: continue if infofield in self.scrippts_meta: event = MISPEvent() event.load_file(str(self.output_dir / self.scrippts_meta[infofield])) location = event.get_objects_by_name('geolocation')[0] update = True else: event = MISPEvent() event.uuid = str(uuid4()) event.info = infofield event.Orgc = self.misp_org event.add_tag(getattr(self, f'tag_{station}')()) location = getattr(self, f'geolocation_{station}')() event.add_object(location) event.add_attribute('link', f'https://scrippsco2.ucsd.edu/data/atmospheric_co2/{station}') update = False with self.scrippts_meta_file.open('a') as f: writer = csv.writer(f) writer.writerow([infofield, f'{event.uuid}.json']) object_creator(event, location, filepath, update) if update: # Bump the publish timestamp event.publish_timestamp = datetime.datetime.timestamp(datetime.datetime.now()) feed_output = event.to_feed(with_meta=False) with (self.output_dir / f'{event.uuid}.json').open('w') as f: # json.dump(feed_output, f, indent=2, sort_keys=True) # For testing json.dump(feed_output, f) def import_monthly_co2_all(self): to_import = ['alt', 'ptb', 'stp', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd'] self.import_all(to_import, 'monthly', 'co2') def import_monthly_c13_all(self): to_import = ['alt', 'ptb', 'stp', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd', 'psa', 'spo'] self.import_all(to_import, 'monthly', 'c13') def import_monthly_o18_all(self): to_import = ['alt', 'ptb', 'stp', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd', 'spo'] self.import_all(to_import, 'monthly', 'o18') def import_daily_co2_all(self): to_import = ['alt', 'ptb', 'stp', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd'] self.import_all(to_import, 'daily', 'co2') def import_daily_c13_all(self): to_import = ['alt', 'ptb', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd', 'spo'] self.import_all(to_import, 'daily', 'c13') def import_daily_o18_all(self): to_import = ['alt', 'ptb', 'ljo', 'bcs', 'mlo', 'kum', 'chr', 'sam', 'ker', 'nzd', 'spo'] self.import_all(to_import, 'daily', 'o18') def split_data_comment(self, csv_file, update, event): comment = '' data = [] with csv_file.open() as f: for line in f: if line[0] == '"': if update: continue if '----------' in line: event.add_attribute('comment', comment, disable_correlation=True) comment = '' continue comment += line[1:-1].strip() else: data.append(line) if not update: event.add_attribute('comment', comment, disable_correlation=True) return data def monthly_flask_co2(self, event, location, csv_file, update): data = self.split_data_comment(csv_file, update, event) dates_already_imported = [] if update: # get all datetime from existing event for obj in event.get_objects_by_name('scrippsco2-co2-monthly'): date_attribute = obj.get_attributes_by_relation('sample-datetime')[0] dates_already_imported.append(date_attribute.value) reader = csv.reader(data) for row in reader: if not row[0].isdigit(): # This file has fucked up headers continue sample_date = parse(f'{row[0]}-{row[1]}-16T00:00:00') if sample_date in dates_already_imported: continue obj = MISPObject('scrippsco2-co2-monthly', standalone=False) obj.add_attribute('sample-datetime', sample_date) obj.add_attribute('sample-date-excel', float(row[2])) obj.add_attribute('sample-date-fractional', float(row[3])) obj.add_attribute('monthly-co2', float(row[4])) obj.add_attribute('monthly-co2-seasonal-adjustment', float(row[5])) obj.add_attribute('monthly-co2-smoothed', float(row[6])) obj.add_attribute('monthly-co2-smoothed-seasonal-adjustment', float(row[7])) obj.add_reference(location, 'sampling-location') event.add_object(obj) def monthly_flask_c13(self, event, location, csv_file, update): data = self.split_data_comment(csv_file, update, event) dates_already_imported = [] if update: # get all datetime from existing event for obj in event.get_objects_by_name('scrippsco2-c13-monthly'): date_attribute = obj.get_attributes_by_relation('sample-datetime')[0] dates_already_imported.append(date_attribute.value) reader = csv.reader(data) for row in reader: if not row[0].isdigit(): # This file has fucked up headers continue sample_date = parse(f'{row[0]}-{row[1]}-16T00:00:00') if sample_date in dates_already_imported: continue obj = MISPObject('scrippsco2-c13-monthly', standalone=False) obj.add_attribute('sample-datetime', sample_date) obj.add_attribute('sample-date-excel', float(row[2])) obj.add_attribute('sample-date-fractional', float(row[3])) obj.add_attribute('monthly-c13', float(row[4])) obj.add_attribute('monthly-c13-seasonal-adjustment', float(row[5])) obj.add_attribute('monthly-c13-smoothed', float(row[6])) obj.add_attribute('monthly-c13-smoothed-seasonal-adjustment', float(row[7])) obj.add_reference(location, 'sampling-location') event.add_object(obj) def monthly_flask_o18(self, event, location, csv_file, update): data = self.split_data_comment(csv_file, update, event) dates_already_imported = [] if update: # get all datetime from existing event for obj in event.get_objects_by_name('scrippsco2-o18-monthly'): date_attribute = obj.get_attributes_by_relation('sample-datetime')[0] dates_already_imported.append(date_attribute.value) reader = csv.reader(data) for row in reader: if not row[0].isdigit(): # This file has fucked up headers continue sample_date = parse(f'{row[0]}-{row[1]}-16T00:00:00') if sample_date in dates_already_imported: continue obj = MISPObject('scrippsco2-o18-monthly', standalone=False) obj.add_attribute('sample-datetime', sample_date) obj.add_attribute('sample-date-excel', float(row[2])) obj.add_attribute('sample-date-fractional', float(row[3])) obj.add_attribute('monthly-o18', float(row[4])) obj.add_attribute('monthly-o18-seasonal-adjustment', float(row[5])) obj.add_attribute('monthly-o18-smoothed', float(row[6])) obj.add_attribute('monthly-o18-smoothed-seasonal-adjustment', float(row[7])) obj.add_reference(location, 'sampling-location') event.add_object(obj) def daily_flask_co2(self, event, location, csv_file, update): data = self.split_data_comment(csv_file, update, event) dates_already_imported = [] if update: # get all datetime from existing event for obj in event.get_objects_by_name('scrippsco2-co2-daily'): date_attribute = obj.get_attributes_by_relation('sample-datetime')[0] dates_already_imported.append(date_attribute.value) reader = csv.reader(data) for row in reader: sample_date = parse(f'{row[0]}-{row[1]}') if sample_date in dates_already_imported: continue obj = MISPObject('scrippsco2-co2-daily', standalone=False) obj.add_attribute('sample-datetime', sample_date) obj.add_attribute('sample-date-excel', float(row[2])) obj.add_attribute('sample-date-fractional', float(row[3])) obj.add_attribute('number-flask', int(row[4])) obj.add_attribute('flag', int(row[5])) attr = obj.add_attribute('co2-value', float(row[6])) attr.add_tag(f'scrippsco2-fgc:{int(row[5])}') obj.add_reference(location, 'sampling-location') event.add_object(obj) def daily_flask_c13(self, event, location, csv_file, update): data = self.split_data_comment(csv_file, update, event) dates_already_imported = [] if update: # get all datetime from existing event for obj in event.get_objects_by_name('scrippsco2-c13-daily'): date_attribute = obj.get_attributes_by_relation('sample-datetime')[0] dates_already_imported.append(date_attribute.value) reader = csv.reader(data) for row in reader: sample_date = parse(f'{row[0]}-{row[1]}') if sample_date in dates_already_imported: continue obj = MISPObject('scrippsco2-c13-daily', standalone=False) obj.add_attribute('sample-datetime', sample_date) obj.add_attribute('sample-date-excel', float(row[2])) obj.add_attribute('sample-date-fractional', float(row[3])) obj.add_attribute('number-flask', int(row[4])) obj.add_attribute('flag', int(row[5])) attr = obj.add_attribute('c13-value', float(row[6])) attr.add_tag(f'scrippsco2-fgi:{int(row[5])}') obj.add_reference(location, 'sampling-location') event.add_object(obj) def daily_flask_o18(self, event, location, csv_file, update): data = self.split_data_comment(csv_file, update, event) dates_already_imported = [] if update: # get all datetime from existing event for obj in event.get_objects_by_name('scrippsco2-o18-daily'): date_attribute = obj.get_attributes_by_relation('sample-datetime')[0] dates_already_imported.append(date_attribute.value) reader = csv.reader(data) for row in reader: sample_date = parse(f'{row[0]}-{row[1]}') if sample_date in dates_already_imported: continue obj = MISPObject('scrippsco2-o18-daily', standalone=False) obj.add_attribute('sample-datetime', sample_date) obj.add_attribute('sample-date-excel', float(row[2])) obj.add_attribute('sample-date-fractional', float(row[3])) obj.add_attribute('number-flask', int(row[4])) obj.add_attribute('flag', int(row[5])) attr = obj.add_attribute('o18-value', float(row[6])) attr.add_tag(f'scrippsco2-fgi:{int(row[5])}') obj.add_reference(location, 'sampling-location') event.add_object(obj) if __name__ == '__main__': output_dir = 'scrippsco2_feed' i = Scrippts(output_dir=output_dir) i.import_daily_co2_all() i.import_daily_c13_all() i.import_daily_o18_all() i.import_monthly_co2_all() i.import_monthly_c13_all() i.import_monthly_o18_all() feed_meta_generator(Path(output_dir))