misp-modules/misp_modules/lib/joe_parser.py

574 lines
26 KiB
Python
Raw Normal View History

# -*- coding: utf-8 -*-
import json
from collections import defaultdict
from datetime import datetime
from pymisp import MISPAttribute, MISPEvent, MISPObject
from joe_mapping import (arch_type_mapping, domain_object_mapping,
dropped_file_mapping, dropped_hash_mapping, elf_object_mapping,
elf_section_flags_mapping, file_object_fields, file_object_mapping,
file_references_mapping, network_behavior_fields,
network_connection_object_mapping, pe_object_fields, pe_object_mapping,
pe_section_object_mapping, process_object_fields, protocols,
registry_references_mapping, regkey_object_mapping, signerinfo_object_mapping)
class JoeParser():
def __init__(self, config):
self.misp_event = MISPEvent()
self.references = defaultdict(list)
self.attributes = defaultdict(lambda: defaultdict(set))
self.process_references = {}
self.import_executable = config["import_executable"]
self.create_mitre_attack = config["mitre_attack"]
def parse_data(self, data):
self.data = data
if self.analysis_type() == "file":
self.parse_fileinfo()
else:
self.parse_url_analysis()
self.parse_system_behavior()
self.parse_network_behavior()
self.parse_screenshot()
self.parse_network_interactions()
self.parse_dropped_files()
if self.attributes:
self.handle_attributes()
if self.create_mitre_attack:
self.parse_mitre_attack()
def build_references(self):
for misp_object in self.misp_event.objects:
object_uuid = misp_object.uuid
if object_uuid in self.references:
for reference in self.references[object_uuid]:
misp_object.add_reference(**reference)
def handle_attributes(self):
for attribute_type, attribute in self.attributes.items():
for attribute_value, references in attribute.items():
attribute_uuid = self.create_attribute(attribute_type, attribute_value)
for reference in references:
source_uuid, relationship = reference
self.references[source_uuid].append(dict(referenced_uuid=attribute_uuid,
relationship_type=relationship))
def parse_dropped_files(self):
droppedinfo = self.data['droppedinfo']
if droppedinfo:
for droppedfile in droppedinfo['hash']:
file_object = MISPObject('file')
for key, mapping in dropped_file_mapping.items():
if droppedfile.get(key) is not None:
attribute = {'value': droppedfile[key], 'to_ids': False}
attribute.update(mapping)
file_object.add_attribute(**attribute)
if droppedfile['@malicious'] == 'true':
file_object.add_attribute(
**{
'type': 'text',
'object_relation': 'state',
'value': 'Malicious',
'to_ids': False
}
)
for h in droppedfile['value']:
hash_type = dropped_hash_mapping[h['@algo']]
file_object.add_attribute(
**{
'type': hash_type,
'object_relation': hash_type,
'value': h['$'],
'to_ids': False
}
)
self.misp_event.add_object(file_object)
reference_key = (int(droppedfile['@targetid']), droppedfile['@process'])
if reference_key in self.process_references:
self.references[self.process_references[reference_key]].append(
{
'referenced_uuid': file_object.uuid,
'relationship_type': 'drops'
}
)
def parse_mitre_attack(self):
mitreattack = self.data.get('mitreattack', {})
if mitreattack:
for tactic in mitreattack['tactic']:
if tactic.get('technique'):
for technique in tactic['technique']:
self.misp_event.add_tag(f'misp-galaxy:mitre-attack-pattern="{technique["name"]} - {technique["id"]}"')
def parse_network_behavior(self):
network = self.data['behavior']['network']
connections = defaultdict(lambda: defaultdict(set))
for protocol, layer in protocols.items():
if network.get(protocol):
for packet in network[protocol]['packet']:
timestamp = datetime.strptime(self.parse_timestamp(packet['timestamp']), '%b %d, %Y %H:%M:%S.%f')
connections[tuple(packet.get(field) for field in network_behavior_fields)][protocol].add(timestamp)
for connection, data in connections.items():
attributes = self.prefetch_attributes_data(connection)
if len(data.keys()) == len(set(protocols[protocol] for protocol in data.keys())):
network_connection_object = MISPObject('network-connection')
for attribute in attributes:
network_connection_object.add_attribute(**attribute)
network_connection_object.add_attribute(
**{
'type': 'datetime',
'object_relation': 'first-packet-seen',
'value': min(tuple(min(timestamp) for timestamp in data.values())),
'to_ids': False
}
)
for protocol in data.keys():
network_connection_object.add_attribute(
**{
'type': 'text',
'object_relation': f'layer{protocols[protocol]}-protocol',
'value': protocol,
'to_ids': False
}
)
self.misp_event.add_object(network_connection_object)
self.references[self.analysisinfo_uuid].append(dict(referenced_uuid=network_connection_object.uuid,
relationship_type='initiates'))
else:
for protocol, timestamps in data.items():
network_connection_object = MISPObject('network-connection')
for attribute in attributes:
network_connection_object.add_attribute(**attribute)
network_connection_object.add_attribute(
**{
'type': 'datetime',
'object_relation': 'first-packet-seen',
'value': min(timestamps),
'to_ids': False
}
)
network_connection_object.add_attribute(
**{
'type': 'text',
'object_relation': f'layer{protocols[protocol]}-protocol',
'value': protocol,
'to_ids': False
}
)
self.misp_event.add_object(network_connection_object)
self.references[self.analysisinfo_uuid].append(dict(referenced_uuid=network_connection_object.uuid,
relationship_type='initiates'))
def parse_screenshot(self):
if self.data['behavior'].get('screenshotdata', {}).get('interesting') is not None:
screenshotdata = self.data['behavior']['screenshotdata']['interesting']['$']
self.misp_event.add_attribute(
**{
'type': 'attachment',
'value': 'screenshot.jpg',
'data': screenshotdata,
'disable_correlation': True,
'to_ids': False
}
)
def parse_system_behavior(self):
if not 'system' in self.data['behavior']:
return
system = self.data['behavior']['system']
if system.get('processes'):
process_activities = {'fileactivities': self.parse_fileactivities,
'registryactivities': self.parse_registryactivities}
for process in system['processes']['process']:
general = process['general']
process_object = MISPObject('process')
for feature, relation in process_object_fields.items():
process_object.add_attribute(
**{
'type': 'text',
'object_relation': relation,
'value': general[feature],
'to_ids': False
}
)
start_time = datetime.strptime(f"{general['date']} {general['time']}", '%d/%m/%Y %H:%M:%S')
process_object.add_attribute(
**{
'type': 'datetime',
'object_relation': 'start-time',
'value': start_time,
'to_ids': False
}
)
self.misp_event.add_object(process_object)
for field, to_call in process_activities.items():
if process.get(field):
to_call(process_object.uuid, process[field])
self.references[self.analysisinfo_uuid].append(dict(referenced_uuid=process_object.uuid,
relationship_type='calls'))
self.process_references[(general['targetid'], general['path'])] = process_object.uuid
def parse_fileactivities(self, process_uuid, fileactivities):
for feature, files in fileactivities.items():
# ignore unknown features
if feature not in file_references_mapping:
continue
if files:
for call in files['call']:
self.attributes['filename'][call['path']].add((process_uuid, file_references_mapping[feature]))
def analysis_type(self):
generalinfo = self.data['generalinfo']
if generalinfo['target']['sample']:
return "file"
elif generalinfo['target']['url']:
return "url"
else:
raise Exception("Unknown analysis type")
def parse_url_analysis(self):
generalinfo = self.data["generalinfo"]
url_object = MISPObject("url")
self.analysisinfo_uuid = url_object.uuid
url_object.add_attribute(
**{
'type': 'url',
'object_relation': 'url',
'value': generalinfo["target"]["url"],
'to_ids': False
}
)
self.misp_event.add_object(url_object)
def parse_fileinfo(self):
fileinfo = self.data['fileinfo']
file_object = MISPObject('file')
self.analysisinfo_uuid = file_object.uuid
for field in file_object_fields:
file_object.add_attribute(
**{
'type': field,
'object_relation': field,
'value': fileinfo[field],
'to_ids': False
}
)
for field, mapping in file_object_mapping.items():
if fileinfo.get(field) is not None:
attribute = {'value': fileinfo[field], 'to_ids': False}
attribute.update(mapping)
file_object.add_attribute(**attribute)
arch = self.data['generalinfo']['arch']
if self.import_executable and arch in arch_type_mapping:
to_call = arch_type_mapping[arch]
2019-07-24 12:21:58 +02:00
getattr(self, to_call)(fileinfo, file_object)
else:
self.misp_event.add_object(file_object)
2019-07-24 12:21:58 +02:00
def parse_apk(self, fileinfo, file_object):
apkinfo = fileinfo['apk']
self.misp_event.add_object(file_object)
permission_lists = defaultdict(list)
for permission in apkinfo['requiredpermissions']['permission']:
permission = permission['@name'].split('.')
permission_lists[' '.join(permission[:-1])].append(permission[-1])
attribute_type = 'text'
for comment, permissions in permission_lists.items():
permission_object = MISPObject('android-permission')
permission_object.add_attribute(
**{
'type': attribute_type,
'object_relation': 'comment',
'value': comment,
'to_ids': False
}
)
for permission in permissions:
permission_object.add_attribute(
**{
'type': attribute_type,
'object_relation': 'permission',
'value': permission,
'to_ids': False
}
)
self.misp_event.add_object(permission_object)
self.references[file_object.uuid].append(dict(referenced_uuid=permission_object.uuid,
relationship_type='grants'))
def parse_elf(self, fileinfo, file_object):
elfinfo = fileinfo['elf']
self.misp_event.add_object(file_object)
attribute_type = 'text'
relationship = 'includes'
size = 'size-in-bytes'
for fileinfo in elfinfo['file']:
elf_object = MISPObject('elf')
self.references[file_object.uuid].append(dict(referenced_uuid=elf_object.uuid,
relationship_type=relationship))
elf = fileinfo['main'][0]['header'][0]
if elf.get('type'):
# Haven't seen anything but EXEC yet in the files I tested
attribute_value = "EXECUTABLE" if elf['type'] == "EXEC (Executable file)" else elf['type']
elf_object.add_attribute(
**{
'type': attribute_type,
'object_relation': 'type',
'value': attribute_value,
'to_ids': False
}
)
for feature, relation in elf_object_mapping.items():
if elf.get(feature):
elf_object.add_attribute(
**{
'type': attribute_type,
'object_relation': relation,
'value': elf[feature],
'to_ids': False
}
)
sections_number = len(fileinfo['sections']['section'])
elf_object.add_attribute(
**{
'type': 'counter',
'object_relation': 'number-sections',
'value': sections_number,
'to_ids': False
}
)
self.misp_event.add_object(elf_object)
for section in fileinfo['sections']['section']:
section_object = MISPObject('elf-section')
for feature in ('name', 'type'):
if section.get(feature):
section_object.add_attribute(
**{
'type': attribute_type,
'object_relation': feature,
'value': section[feature],
'to_ids': False
}
)
if section.get('size'):
section_object.add_attribute(
**{
'type': size,
'object_relation': size,
'value': int(section['size'], 16),
'to_ids': False
}
)
for flag in section['flagsdesc']:
try:
attribute_value = elf_section_flags_mapping[flag]
section_object.add_attribute(
**{
'type': attribute_type,
'object_relation': 'flag',
'value': attribute_value,
'to_ids': False
}
)
except KeyError:
print(f'Unknown elf section flag: {flag}')
continue
self.misp_event.add_object(section_object)
self.references[elf_object.uuid].append(dict(referenced_uuid=section_object.uuid,
relationship_type=relationship))
2019-07-24 12:21:58 +02:00
def parse_pe(self, fileinfo, file_object):
try:
peinfo = fileinfo['pe']
except KeyError:
self.misp_event.add_object(file_object)
return
pe_object = MISPObject('pe')
relationship = 'includes'
file_object.add_reference(pe_object.uuid, relationship)
self.misp_event.add_object(file_object)
for field, mapping in pe_object_fields.items():
if peinfo.get(field) is not None:
attribute = {'value': peinfo[field], 'to_ids': False}
attribute.update(mapping)
pe_object.add_attribute(**attribute)
pe_object.add_attribute(
**{
'type': 'datetime',
'object_relation': 'compilation-timestamp',
'value': int(peinfo['timestamp'].split()[0], 16),
'to_ids': False
}
)
program_name = fileinfo['filename']
if peinfo['versions']:
for feature in peinfo['versions']['version']:
name = feature['name']
if name == 'InternalName':
program_name = feature['value']
if name in pe_object_mapping:
pe_object.add_attribute(
**{
'type': 'text',
'object_relation': pe_object_mapping[name],
'value': feature['value'],
'to_ids': False
}
)
sections_number = len(peinfo['sections']['section'])
pe_object.add_attribute(
**{
'type': 'counter',
'object_relation': 'number-sections',
'value': sections_number,
'to_ids': False
}
)
signatureinfo = peinfo['signature']
if signatureinfo['signed']:
signerinfo_object = MISPObject('authenticode-signerinfo')
pe_object.add_reference(signerinfo_object.uuid, 'signed-by')
self.misp_event.add_object(pe_object)
signerinfo_object.add_attribute(
**{
'type': 'text',
'object_relation': 'program-name',
'value': program_name,
'to_ids': False
}
)
for feature, mapping in signerinfo_object_mapping.items():
if signatureinfo.get(feature) is not None:
attribute = {'value': signatureinfo[feature], 'to_ids': False}
attribute.update(mapping)
signerinfo_object.add_attribute(**attribute)
self.misp_event.add_object(signerinfo_object)
else:
self.misp_event.add_object(pe_object)
for section in peinfo['sections']['section']:
section_object = self.parse_pe_section(section)
self.references[pe_object.uuid].append(dict(referenced_uuid=section_object.uuid,
relationship_type=relationship))
self.misp_event.add_object(section_object)
def parse_pe_section(self, section):
section_object = MISPObject('pe-section')
for feature, mapping in pe_section_object_mapping.items():
if section.get(feature) is not None:
attribute = {'value': section[feature], 'to_ids': False}
attribute.update(mapping)
section_object.add_attribute(**attribute)
return section_object
def parse_network_interactions(self):
domaininfo = self.data['domaininfo']
if domaininfo:
for domain in domaininfo['domain']:
if domain['@ip'] != 'unknown':
domain_object = MISPObject('domain-ip')
for key, mapping in domain_object_mapping.items():
if domain.get(key) is not None:
attribute = {'value': domain[key], 'to_ids': False}
attribute.update(mapping)
domain_object.add_attribute(**attribute)
self.misp_event.add_object(domain_object)
reference = dict(referenced_uuid=domain_object.uuid, relationship_type='contacts')
self.add_process_reference(domain['@targetid'], domain['@currentpath'], reference)
else:
attribute = MISPAttribute()
attribute.from_dict(**{'type': 'domain', 'value': domain['@name'], 'to_ids': False})
self.misp_event.add_attribute(**attribute)
reference = dict(referenced_uuid=attribute.uuid, relationship_type='contacts')
self.add_process_reference(domain['@targetid'], domain['@currentpath'], reference)
ipinfo = self.data['ipinfo']
if ipinfo:
for ip in ipinfo['ip']:
attribute = MISPAttribute()
attribute.from_dict(**{'type': 'ip-dst', 'value': ip['@ip'], 'to_ids': False})
self.misp_event.add_attribute(**attribute)
reference = dict(referenced_uuid=attribute.uuid, relationship_type='contacts')
self.add_process_reference(ip['@targetid'], ip['@currentpath'], reference)
urlinfo = self.data['urlinfo']
if urlinfo:
for url in urlinfo['url']:
target_id = int(url['@targetid'])
current_path = url['@currentpath']
attribute = MISPAttribute()
attribute_dict = {'type': 'url', 'value': url['@name'], 'to_ids': False}
if target_id != -1 and current_path != 'unknown':
self.references[self.process_references[(target_id, current_path)]].append({
'referenced_uuid': attribute.uuid,
'relationship_type': 'contacts'
})
else:
attribute_dict['comment'] = 'From Memory - Enriched via the joe_import module'
attribute.from_dict(**attribute_dict)
self.misp_event.add_attribute(**attribute)
def parse_registryactivities(self, process_uuid, registryactivities):
if registryactivities['keyCreated']:
for call in registryactivities['keyCreated']['call']:
self.attributes['regkey'][call['path']].add((process_uuid, 'creates'))
for feature, relationship in registry_references_mapping.items():
if registryactivities[feature]:
for call in registryactivities[feature]['call']:
registry_key = MISPObject('registry-key')
for field, mapping in regkey_object_mapping.items():
if call.get(field) is not None:
attribute = {'value': call[field], 'to_ids': False}
attribute.update(mapping)
registry_key.add_attribute(**attribute)
registry_key.add_attribute(
**{
'type': 'text',
'object_relation': 'data-type',
'value': f"REG_{call['type'].upper()}",
'to_ids': False
}
)
self.misp_event.add_object(registry_key)
self.references[process_uuid].append(dict(referenced_uuid=registry_key.uuid,
relationship_type=relationship))
def add_process_reference(self, target, currentpath, reference):
try:
self.references[self.process_references[(int(target), currentpath)]].append(reference)
except KeyError:
self.references[self.analysisinfo_uuid].append(reference)
def create_attribute(self, attribute_type, attribute_value):
attribute = MISPAttribute()
attribute.from_dict(**{'type': attribute_type, 'value': attribute_value, 'to_ids': False})
self.misp_event.add_attribute(**attribute)
return attribute.uuid
def finalize_results(self):
if self.references:
self.build_references()
event = json.loads(self.misp_event.to_json())
self.results = {key: event[key] for key in ('Attribute', 'Object', 'Tag') if (key in event and event[key])}
@staticmethod
def parse_timestamp(timestamp):
timestamp = timestamp.split(':')
timestamp[-1] = str(round(float(timestamp[-1].split(' ')[0]), 6))
return ':'.join(timestamp)
@staticmethod
def prefetch_attributes_data(connection):
attributes = []
for field, value in zip(network_behavior_fields, connection):
attribute = {'value': value, 'to_ids': False}
attribute.update(network_connection_object_mapping[field])
attributes.append(attribute)
return attributes