MITRE galaxy regeneration + updated migration script

2018-12-29 18:14:47 +01:00 · 2018-12-29 18:14:47 +01:00 · 50c817a1fd
parent 3bf1fc5f9b 1a65dfb9f4
commit 50c817a1fd
6 changed files with 51096 additions and 9212 deletions
--- a/clusters/mitre-attack-pattern.json
+++ b/clusters/mitre-attack-pattern.json
--- a/clusters/mitre-course-of-action.json
+++ b/clusters/mitre-course-of-action.json
--- a/clusters/mitre-intrusion-set.json
+++ b/clusters/mitre-intrusion-set.json
--- a/clusters/mitre-malware.json
+++ b/clusters/mitre-malware.json
--- a/clusters/mitre-tool.json
+++ b/clusters/mitre-tool.json
--- a/tools/mitre-cti/v2.0/create_mitre-galaxy.py
+++ b/tools/mitre-cti/v2.0/create_mitre-galaxy.py
@ -0,0 +1,162 @@
+#!/usr/bin/env python3
+import json
+import re
+import os
+import argparse
+
+parser = argparse.ArgumentParser(description='Create a couple galaxy/cluster with cti\'s intrusion-sets\nMust be in the mitre/cti/enterprise-attack/intrusion-set folder')
+parser.add_argument("-p", "--path", required=True, help="Path of the mitre/cti folder")
+
+args = parser.parse_args()
+
+values = []
+misp_dir = '../../../'
+
+
+domains = ['enterprise-attack', 'mobile-attack', 'pre-attack']
+types = ['attack-pattern', 'course-of-action', 'intrusion-set', 'malware', 'tool']
+all_data = {}  # variable that will contain everything
+
+# read in existing data
+# THIS IS FOR MIGRATION - reading the data from the enterprise-attack, mobile-attack, pre-attack
+# first build a data set of the MISP Galaxy ATT&CK elements by using the UUID as reference, this speeds up lookups later on.
+# at the end we will convert everything again to separate datasets
+all_data_uuid = {}
+for domain in domains:
+    for t in types:
+        fname = os.path.join(misp_dir, 'clusters', 'mitre-{}-{}.json'.format(domain, t))
+        if os.path.exists(fname):
+            # print("##### {}".format(fname))
+            with open(fname) as f:
+                file_data = json.load(f)
+            # print(file_data)
+            for value in file_data['values']:
+                if value['uuid'] in all_data_uuid:
+                    # exit("ERROR: Something is really wrong, we seem to have duplicates.")
+                    # if it already exists we need to copy over all the data manually to merge it
+                    # on the other hand, from a manual analysis it looks like it's mostly the relations that are different
+                    # so now we will just copy over the relationships
+                    # actually, at time of writing the code below results in no change as the new items always contained more than the previously seen items
+                    value_orig = all_data_uuid[value['uuid']]
+                    if 'related' in value_orig:
+                        for related_item in value_orig['related']:
+                            if related_item not in value['related']:
+                                value['related'].append(related_item)
+                all_data_uuid[value['uuid']] = value
+
+# THIS IS FOR NORMAL OPERATIONS - reading from the very old and new models - one model per type
+# FIXME implement this (copy paste above or put above in function and call function)
+
+
+# now load the MITRE ATT&CK
+for domain in domains:
+    attack_dir = os.path.join(args.path, domain)
+    if not os.path.exists(attack_dir):
+        exit("ERROR: MITRE ATT&CK folder incorrect")
+
+    with open(os.path.join(attack_dir, domain + '.json')) as f:
+        attack_data = json.load(f)
+
+    for item in attack_data['objects']:
+        if item['type'] not in types:
+            continue
+
+        # print(json.dumps(item, indent=2, sort_keys=True, ensure_ascii=False))
+        try:
+            # build the new data structure
+            value = {}
+            uuid = re.search('--(.*)$', item['id']).group(0)[2:]
+            # item exist already in the all_data set
+            update = False
+            if uuid in all_data_uuid:
+                value = all_data_uuid[uuid]
+
+            if 'description' in item:
+                value['description'] = item['description']
+            value['value'] = item['name'] + ' - ' + item['external_references'][0]['external_id']
+            value['meta'] = {}
+            value['meta']['refs'] = []
+            value['uuid'] = re.search('--(.*)$', item['id']).group(0)[2:]
+
+            if 'aliases' in item:
+                value['meta']['synonyms'] = item['aliases']
+            if 'x_mitre_aliases' in item:
+                value['meta']['synonyms'] = item['x_mitre_aliases']
+
+            for reference in item['external_references']:
+                if 'url' in reference and reference['url'] not in value['meta']['refs']:
+                    value['meta']['refs'].append(reference['url'])
+                if 'external_id' in reference:
+                    value['meta']['external_id'] = reference['external_id']
+
+            if 'kill_chain_phases' in item:   # many (but not all) attack-patterns have this
+                value['meta']['kill_chain'] = []
+                for killchain in item['kill_chain_phases']:
+                    value['meta']['kill_chain'].append(killchain['kill_chain_name'] + ':' + killchain['phase_name'])
+            if 'x_mitre_data_sources' in item:
+                value['meta']['mitre_data_sources'] = item['x_mitre_data_sources']
+            if 'x_mitre_platforms' in item:
+                value['meta']['mitre_platforms'] = item['x_mitre_platforms']
+            # TODO add the other x_mitre elements dynamically
+
+            # relationships will be build separately afterwards
+            value['type'] = item['type']  # remove this before dump to json
+            # print(json.dumps(value, sort_keys=True, indent=2))
+
+            all_data_uuid[uuid] = value
+
+        except Exception as e:
+            print(json.dumps(item, sort_keys=True, indent=2))
+            import traceback
+            traceback.print_exc()
+
+    # process the 'relationship' type as we now know the existence of all ATT&CK uuids
+    for item in attack_data['objects']:
+        if item['type'] != 'relationship':
+            continue
+        # print(json.dumps(item, indent=2, sort_keys=True, ensure_ascii=False))
+
+        rel_type = item['relationship_type']
+        dest_uuid = re.findall(r'--([0-9a-f-]+)', item['target_ref']).pop()
+        source_uuid = re.findall(r'--([0-9a-f-]+)', item['source_ref']).pop()
+        tags = []
+
+        # add the relation in the defined way
+        rel_source = {
+            "dest-uuid": dest_uuid,
+            "tags": [
+                "estimative-language:likelihood-probability=\"almost-certain\""
+            ],
+            "type": rel_type
+        }
+        if 'related' not in all_data_uuid[source_uuid]:
+            all_data_uuid[source_uuid]['related'] = []
+        if rel_source not in all_data_uuid[source_uuid]['related']:
+            all_data_uuid[source_uuid]['related'].append(rel_source)
+
+        # LATER find the opposite word of "rel_type" and build the relation in the opposite direction
+
+# dump all_data to their respective file
+for t in types:
+    fname = os.path.join(misp_dir, 'clusters', 'mitre-{}.json'.format(t))
+    if not os.path.exists(fname):
+        exit("File {} does not exist, this is unexpected.".format(fname))
+    with open(fname) as f:
+        file_data = json.load(f)
+
+    file_data['values'] = []
+    for item in all_data_uuid.values():
+        # print(json.dumps(item, sort_keys=True, indent=2))
+        if item['type'] != t:
+            continue
+        item_2 = item.copy()
+        item_2.pop('type', None)
+        file_data['values'].append(item_2)
+
+    file_data['values'] = sorted(file_data['values'], key=lambda x: sorted(x['value']))  # FIXME the sort algo needs to be further improved
+    file_data['version'] += 1
+    with open(fname, 'w') as f:
+        json.dump(file_data, f, indent=2, sort_keys=True, ensure_ascii=False)
+        f.write('\n')  # only needed for the beauty and to be compliant with jq_all_the_things
+
+print("All done, please don't forget to ./validate_all.sh and ./jq_all_the_things.sh")