diff --git a/README.md b/README.md index ecce406..bc2056a 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,8 @@ For more information: [Extending MISP with Python modules](https://www.circl.lu/ * [intel471](misp_modules/modules/expansion/intel471.py) - an expansion module to get info from [Intel471](https://intel471.com). * [IPASN](misp_modules/modules/expansion/ipasn.py) - a hover and expansion to get the BGP ASN of an IP address. * [iprep](misp_modules/modules/expansion/iprep.py) - an expansion module to get IP reputation from packetmail.net. -* [Joe Sandbox](misp_modules/modules/expansion/joesandbox_submit.py) - Submit files and URLs to Joe Sandbox. +* [Joe Sandbox submit](misp_modules/modules/expansion/joesandbox_submit.py) - Submit files and URLs to Joe Sandbox. +* [Joe Sandbox query](misp_modules/modules/expansion/joesandbox_query.py) - Query Joe Sandbox with the link of an analysis and get the parsed data. * [macaddress.io](misp_modules/modules/expansion/macaddress_io.py) - a hover module to retrieve vendor details and other information regarding a given MAC address or an OUI from [MAC address Vendor Lookup](https://macaddress.io). See [integration tutorial here](https://macaddress.io/integrations/MISP-module). * [macvendors](misp_modules/modules/expansion/macvendors.py) - a hover module to retrieve mac vendor information. * [ocr-enrich](misp_modules/modules/expansion/ocr-enrich.py) - an enrichment module to get OCRized data from images into MISP. @@ -63,6 +64,7 @@ For more information: [Extending MISP with Python modules](https://www.circl.lu/ * [STIX2 pattern syntax validator](misp_modules/modules/expansion/stix2_pattern_syntax_validator.py) - a module to check a STIX2 pattern syntax. * [ThreatCrowd](misp_modules/modules/expansion/threatcrowd.py) - an expansion module for [ThreatCrowd](https://www.threatcrowd.org/). * [threatminer](misp_modules/modules/expansion/threatminer.py) - an expansion module to expand from [ThreatMiner](https://www.threatminer.org/). +* [urlhaus](misp_modules/modules/expansion/urlhaus.py) - Query urlhaus to get additional data about a domain, hash, hostname, ip or url. * [urlscan](misp_modules/modules/expansion/urlscan.py) - an expansion module to query [urlscan.io](https://urlscan.io). * [virustotal](misp_modules/modules/expansion/virustotal.py) - an expansion module to pull known resolutions and malware samples related with an IP/Domain from virusTotal (this modules require a VirusTotal private API key) * [VMray](misp_modules/modules/expansion/vmray_submit.py) - a module to submit a sample to VMray. @@ -92,7 +94,8 @@ For more information: [Extending MISP with Python modules](https://www.circl.lu/ * [CSV import](misp_modules/modules/import_mod/csvimport.py) Customizable CSV import module. * [Cuckoo JSON](misp_modules/modules/import_mod/cuckooimport.py) Cuckoo JSON import. * [Email Import](misp_modules/modules/import_mod/email_import.py) Email import module for MISP to import basic metadata. -* [GoAML import](misp_modules/modules/import_mod/) Module to import [GoAML](http://goaml.unodc.org/goaml/en/index.html) XML format. +* [GoAML import](misp_modules/modules/import_mod/goamlimport.py) Module to import [GoAML](http://goaml.unodc.org/goaml/en/index.html) XML format. +* [Joe Sandbox import](misp_modules/modules/import_mod/joe_import.py) Parse data from a Joe Sandbox json report. * [OCR](misp_modules/modules/import_mod/ocr.py) Optical Character Recognition (OCR) module for MISP to import attributes from images, scan or faxes. * [OpenIOC](misp_modules/modules/import_mod/openiocimport.py) OpenIOC import based on PyMISP library. * [ThreatAnalyzer](misp_modules/modules/import_mod/threatanalyzer_import.py) - An import module to process ThreatAnalyzer archive.zip/analysis.json sandbox exports. diff --git a/misp_modules/lib/__init__.py b/misp_modules/lib/__init__.py new file mode 100644 index 0000000..0dbceb8 --- /dev/null +++ b/misp_modules/lib/__init__.py @@ -0,0 +1 @@ +all = ['joe_parser'] diff --git a/misp_modules/lib/joe_parser.py b/misp_modules/lib/joe_parser.py new file mode 100644 index 0000000..83f1fa0 --- /dev/null +++ b/misp_modules/lib/joe_parser.py @@ -0,0 +1,334 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +from datetime import datetime +from pymisp import MISPAttribute, MISPEvent, MISPObject +import json + + +domain_object_mapping = {'@ip': ('ip-dst', 'ip'), '@name': ('domain', 'domain')} +dropped_file_mapping = {'@entropy': ('float', 'entropy'), + '@file': ('filename', 'filename'), + '@size': ('size-in-bytes', 'size-in-bytes'), + '@type': ('mime-type', 'mimetype')} +dropped_hash_mapping = {'MD5': 'md5', 'SHA': 'sha1', 'SHA-256': 'sha256', 'SHA-512': 'sha512'} +file_object_fields = ['filename', 'md5', 'sha1', 'sha256', 'sha512', 'ssdeep'] +file_object_mapping = {'entropy': ('float', 'entropy'), + 'filesize': ('size-in-bytes', 'size-in-bytes'), + 'filetype': ('mime-type', 'mimetype')} +file_references_mapping = {'fileCreated': 'creates', 'fileDeleted': 'deletes', + 'fileMoved': 'moves', 'fileRead': 'reads', 'fileWritten': 'writes'} +network_behavior_fields = ('srcip', 'dstip', 'srcport', 'dstport') +network_connection_object_mapping = {'srcip': ('ip-src', 'ip-src'), 'dstip': ('ip-dst', 'ip-dst'), + 'srcport': ('port', 'src-port'), 'dstport': ('port', 'dst-port')} +pe_object_fields = {'entrypoint': ('text', 'entrypoint-address'), + 'imphash': ('imphash', 'imphash')} +pe_object_mapping = {'CompanyName': 'company-name', 'FileDescription': 'file-description', + 'FileVersion': 'file-version', 'InternalName': 'internal-filename', + 'LegalCopyright': 'legal-copyright', 'OriginalFilename': 'original-filename', + 'ProductName': 'product-filename', 'ProductVersion': 'product-version', + 'Translation': 'lang-id'} +process_object_fields = {'cmdline': 'command-line', 'name': 'name', + 'parentpid': 'parent-pid', 'pid': 'pid', + 'path': 'current-directory'} +protocols = {'tcp': 4, 'udp': 4, 'icmp': 3, + 'http': 7, 'https': 7, 'ftp': 7} +section_object_mapping = {'characteristics': ('text', 'characteristic'), + 'entropy': ('float', 'entropy'), + 'name': ('text', 'name'), 'rawaddr': ('hex', 'offset'), + 'rawsize': ('size-in-bytes', 'size-in-bytes'), + 'virtaddr': ('hex', 'virtual_address'), + 'virtsize': ('size-in-bytes', 'virtual_size')} +registry_references_mapping = {'keyValueCreated': 'creates', 'keyValueModified': 'modifies'} +regkey_object_mapping = {'name': ('text', 'name'), 'newdata': ('text', 'data'), + 'path': ('regkey', 'key')} +signerinfo_object_mapping = {'sigissuer': ('text', 'issuer'), + 'version': ('text', 'version')} + + +class JoeParser(): + def __init__(self): + self.misp_event = MISPEvent() + self.references = defaultdict(list) + self.attributes = defaultdict(lambda: defaultdict(set)) + self.process_references = {} + + def parse_data(self, data): + self.data = data + if self.analysis_type() == "file": + self.parse_fileinfo() + else: + self.parse_url_analysis() + + self.parse_system_behavior() + self.parse_network_behavior() + self.parse_network_interactions() + self.parse_dropped_files() + + if self.attributes: + self.handle_attributes() + self.parse_mitre_attack() + + def build_references(self): + for misp_object in self.misp_event.objects: + object_uuid = misp_object.uuid + if object_uuid in self.references: + for reference in self.references[object_uuid]: + misp_object.add_reference(reference['idref'], reference['relationship']) + + def handle_attributes(self): + for attribute_type, attribute in self.attributes.items(): + for attribute_value, references in attribute.items(): + attribute_uuid = self.create_attribute(attribute_type, attribute_value) + for reference in references: + source_uuid, relationship = reference + self.references[source_uuid].append({'idref': attribute_uuid, 'relationship': relationship}) + + def parse_dropped_files(self): + droppedinfo = self.data['droppedinfo'] + if droppedinfo: + for droppedfile in droppedinfo['hash']: + file_object = MISPObject('file') + for key, mapping in dropped_file_mapping.items(): + attribute_type, object_relation = mapping + file_object.add_attribute(object_relation, **{'type': attribute_type, 'value': droppedfile[key]}) + if droppedfile['@malicious'] == 'true': + file_object.add_attribute('state', **{'type': 'text', 'value': 'Malicious'}) + for h in droppedfile['value']: + hash_type = dropped_hash_mapping[h['@algo']] + file_object.add_attribute(hash_type, **{'type': hash_type, 'value': h['$']}) + self.misp_event.add_object(**file_object) + self.references[self.process_references[(int(droppedfile['@targetid']), droppedfile['@process'])]].append({ + 'idref': file_object.uuid, + 'relationship': 'drops' + }) + + def parse_mitre_attack(self): + mitreattack = self.data['mitreattack'] + if mitreattack: + for tactic in mitreattack['tactic']: + if tactic.get('technique'): + for technique in tactic['technique']: + self.misp_event.add_tag('misp-galaxy:mitre-attack-pattern="{} - {}"'.format(technique['name'], technique['id'])) + + def parse_network_behavior(self): + network = self.data['behavior']['network'] + connections = defaultdict(lambda: defaultdict(set)) + for protocol, layer in protocols.items(): + if network.get(protocol): + for packet in network[protocol]['packet']: + timestamp = datetime.strptime(self.parse_timestamp(packet['timestamp']), '%b %d, %Y %H:%M:%S.%f') + connections[tuple(packet[field] for field in network_behavior_fields)][protocol].add(timestamp) + for connection, data in connections.items(): + attributes = self.prefetch_attributes_data(connection) + if len(data.keys()) == len(set(protocols[protocol] for protocol in data.keys())): + network_connection_object = MISPObject('network-connection') + for object_relation, attribute in attributes.items(): + network_connection_object.add_attribute(object_relation, **attribute) + network_connection_object.add_attribute('first-packet-seen', + **{'type': 'datetime', 'value': min(tuple(min(timestamp) for timestamp in data.values()))}) + for protocol in data.keys(): + network_connection_object.add_attribute('layer{}-protocol'.format(protocols[protocol]), **{'type': 'text', 'value': protocol}) + self.misp_event.add_object(**network_connection_object) + self.references[self.analysisinfo_uuid].append({'idref': network_connection_object.uuid, 'relationship': 'initiates'}) + else: + for protocol, timestamps in data.items(): + network_connection_object = MISPObject('network-connection') + for object_relation, attribute in attributes.items(): + network_connection_object.add_attribute(object_relation, **attribute) + network_connection_object.add_attribute('first-packet-seen', **{'type': 'datetime', 'value': min(timestamps)}) + network_connection_object.add_attribute('layer{}-protocol'.format(protocols[protocol]), **{'type': 'text', 'value': protocol}) + self.misp_event.add_object(**network_connection_object) + self.references[self.analysisinfo_uuid].append({'idref': network_connection_object.uuid, 'relationship': 'initiates'}) + + def parse_system_behavior(self): + system = self.data['behavior']['system'] + if system.get('processes'): + process_activities = {'fileactivities': self.parse_fileactivities, + 'registryactivities': self.parse_registryactivities} + for process in system['processes']['process']: + general = process['general'] + process_object = MISPObject('process') + for feature, relation in process_object_fields.items(): + process_object.add_attribute(relation, **{'type': 'text', 'value': general[feature]}) + start_time = datetime.strptime('{} {}'.format(general['date'], general['time']), '%d/%m/%Y %H:%M:%S') + process_object.add_attribute('start-time', **{'type': 'datetime', 'value': start_time}) + self.misp_event.add_object(**process_object) + for field, to_call in process_activities.items(): + to_call(process_object.uuid, process[field]) + self.references[self.analysisinfo_uuid].append({'idref': process_object.uuid, 'relationship': 'calls'}) + self.process_references[(general['targetid'], general['path'])] = process_object.uuid + + def parse_fileactivities(self, process_uuid, fileactivities): + for feature, files in fileactivities.items(): + # ignore unknown features + if feature not in file_references_mapping: + continue + + if files: + for call in files['call']: + self.attributes['filename'][call['path']].add((process_uuid, file_references_mapping[feature])) + + def analysis_type(self): + generalinfo = self.data['generalinfo'] + + if generalinfo['target']['sample']: + return "file" + elif generalinfo['target']['url']: + return "url" + else: + raise Exception("Unknown analysis type") + + def parse_url_analysis(self): + generalinfo = self.data["generalinfo"] + + url_object = MISPObject("url") + self.analysisinfo_uuid = url_object.uuid + + url_object.add_attribute("url", generalinfo["target"]["url"]) + self.misp_event.add_object(**url_object) + + def parse_fileinfo(self): + fileinfo = self.data['fileinfo'] + + file_object = MISPObject('file') + self.analysisinfo_uuid = file_object.uuid + + for field in file_object_fields: + file_object.add_attribute(field, **{'type': field, 'value': fileinfo[field]}) + for field, mapping in file_object_mapping.items(): + attribute_type, object_relation = mapping + file_object.add_attribute(object_relation, **{'type': attribute_type, 'value': fileinfo[field]}) + if not fileinfo.get('pe'): + self.misp_event.add_object(**file_object) + return + peinfo = fileinfo['pe'] + pe_object = MISPObject('pe') + file_object.add_reference(pe_object.uuid, 'included-in') + self.misp_event.add_object(**file_object) + for field, mapping in pe_object_fields.items(): + attribute_type, object_relation = mapping + pe_object.add_attribute(object_relation, **{'type': attribute_type, 'value': peinfo[field]}) + pe_object.add_attribute('compilation-timestamp', **{'type': 'datetime', 'value': int(peinfo['timestamp'].split()[0], 16)}) + program_name = fileinfo['filename'] + if peinfo['versions']: + for feature in peinfo['versions']['version']: + name = feature['name'] + if name == 'InternalName': + program_name = feature['value'] + if name in pe_object_mapping: + pe_object.add_attribute(pe_object_mapping[name], **{'type': 'text', 'value': feature['value']}) + sections_number = len(peinfo['sections']['section']) + pe_object.add_attribute('number-sections', **{'type': 'counter', 'value': sections_number}) + signatureinfo = peinfo['signature'] + if signatureinfo['signed']: + signerinfo_object = MISPObject('authenticode-signerinfo') + pe_object.add_reference(signerinfo_object.uuid, 'signed-by') + self.misp_event.add_object(**pe_object) + signerinfo_object.add_attribute('program-name', **{'type': 'text', 'value': program_name}) + for feature, mapping in signerinfo_object_mapping.items(): + attribute_type, object_relation = mapping + signerinfo_object.add_attribute(object_relation, **{'type': attribute_type, 'value': signatureinfo[feature]}) + self.misp_event.add_object(**signerinfo_object) + else: + self.misp_event.add_object(**pe_object) + for section in peinfo['sections']['section']: + section_object = self.parse_pe_section(section) + self.references[pe_object.uuid].append({'idref': section_object.uuid, 'relationship': 'included-in'}) + self.misp_event.add_object(**section_object) + + def parse_network_interactions(self): + domaininfo = self.data['domaininfo'] + if domaininfo: + for domain in domaininfo['domain']: + if domain['@ip'] != 'unknown': + domain_object = MISPObject('domain-ip') + for key, mapping in domain_object_mapping.items(): + attribute_type, object_relation = mapping + domain_object.add_attribute(object_relation, + **{'type': attribute_type, 'value': domain[key]}) + self.misp_event.add_object(**domain_object) + reference = {'idref': domain_object.uuid, 'relationship': 'contacts'} + self.add_process_reference(domain['@targetid'], domain['@currentpath'], reference) + else: + attribute = MISPAttribute() + attribute.from_dict(**{'type': 'domain', 'value': domain['@name']}) + reference = {'idref': attribute.uuid, 'relationship': 'contacts'} + self.add_process_reference(domain['@targetid'], domain['@currentpath'], reference) + ipinfo = self.data['ipinfo'] + if ipinfo: + for ip in ipinfo['ip']: + attribute = MISPAttribute() + attribute.from_dict(**{'type': 'ip-dst', 'value': ip['@ip']}) + self.misp_event.add_attribute(**attribute) + reference = {'idref': attribute.uuid, 'relationship': 'contacts'} + self.add_process_reference(ip['@targetid'], ip['@currentpath'], reference) + urlinfo = self.data['urlinfo'] + if urlinfo: + for url in urlinfo['url']: + target_id = int(url['@targetid']) + current_path = url['@currentpath'] + attribute = MISPAttribute() + attribute_dict = {'type': 'url', 'value': url['@name']} + if target_id != -1 and current_path != 'unknown': + self.references[self.process_references[(target_id, current_path)]].append({ + 'idref': attribute.uuid, + 'relationship': 'contacts' + }) + else: + attribute_dict['comment'] = 'From Memory - Enriched via the joe_import module' + attribute.from_dict(**attribute_dict) + self.misp_event.add_attribute(**attribute) + + def parse_pe_section(self, section): + section_object = MISPObject('pe-section') + for feature, mapping in section_object_mapping.items(): + attribute_type, object_relation = mapping + section_object.add_attribute(object_relation, **{'type': attribute_type, 'value': section[feature]}) + return section_object + + def parse_registryactivities(self, process_uuid, registryactivities): + if registryactivities['keyCreated']: + for call in registryactivities['keyCreated']['call']: + self.attributes['regkey'][call['path']].add((process_uuid, 'creates')) + for feature, relationship_type in registry_references_mapping.items(): + if registryactivities[feature]: + for call in registryactivities[feature]['call']: + registry_key = MISPObject('registry-key') + for field, mapping in regkey_object_mapping.items(): + attribute_type, object_relation = mapping + registry_key.add_attribute(object_relation, **{'type': attribute_type, 'value': call[field]}) + registry_key.add_attribute('data-type', **{'type': 'text', 'value': 'REG_{}'.format(call['type'].upper())}) + self.misp_event.add_object(**registry_key) + self.references[process_uuid].append({'idref': registry_key.uuid, 'relationship': relationship_type}) + + def add_process_reference(self, target, currentpath, reference): + try: + self.references[self.process_references[(int(target), currentpath)]].append(reference) + except KeyError: + self.references[self.analysisinfo_uuid].append(reference) + + def create_attribute(self, attribute_type, attribute_value): + attribute = MISPAttribute() + attribute.from_dict(**{'type': attribute_type, 'value': attribute_value}) + self.misp_event.add_attribute(**attribute) + return attribute.uuid + + def finalize_results(self): + if self.references: + self.build_references() + event = json.loads(self.misp_event.to_json())['Event'] + self.results = {key: event[key] for key in ('Attribute', 'Object', 'Tag', 'Galaxy') if (key in event and event[key])} + + @staticmethod + def parse_timestamp(timestamp): + timestamp = timestamp.split(':') + timestamp[-1] = str(round(float(timestamp[-1].split(' ')[0]), 6)) + return ':'.join(timestamp) + + @staticmethod + def prefetch_attributes_data(connection): + attributes = {} + for field, value in zip(network_behavior_fields, connection): + attribute_type, object_relation = network_connection_object_mapping[field] + attributes[object_relation] = {'type': attribute_type, 'value': value} + return attributes diff --git a/misp_modules/modules/expansion/__init__.py b/misp_modules/modules/expansion/__init__.py index e5d17c1..acf49f2 100644 --- a/misp_modules/modules/expansion/__init__.py +++ b/misp_modules/modules/expansion/__init__.py @@ -1,4 +1,7 @@ from . import _vmray # noqa +import os +import sys +sys.path.append('{}/lib'.format('/'.join((os.path.realpath(__file__)).split('/')[:-3]))) __all__ = ['cuckoo_submit', 'vmray_submit', 'bgpranking', 'circl_passivedns', 'circl_passivessl', 'countrycode', 'cve', 'dns', 'btc_steroids', 'domaintools', 'eupi', @@ -10,4 +13,4 @@ __all__ = ['cuckoo_submit', 'vmray_submit', 'bgpranking', 'circl_passivedns', 'c 'sigma_queries', 'dbl_spamhaus', 'vulners', 'yara_query', 'macaddress_io', 'intel471', 'backscatter_io', 'btc_scam_check', 'hibp', 'greynoise', 'macvendors', 'qrcode', 'ocr-enrich', 'pdf-enrich', 'docx-enrich', 'xlsx-enrich', 'pptx-enrich', - 'ods-enrich', 'odt-enrich', 'joesandbox_submit'] + 'ods-enrich', 'odt-enrich', 'joesandbox_submit', 'joesandbox_query', 'urlhaus'] diff --git a/misp_modules/modules/expansion/joesandbox_query.py b/misp_modules/modules/expansion/joesandbox_query.py new file mode 100644 index 0000000..dce63ea --- /dev/null +++ b/misp_modules/modules/expansion/joesandbox_query.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- +import jbxapi +import json +from joe_parser import JoeParser + +misperrors = {'error': 'Error'} +mispattributes = {'input': ['link'], 'format': 'misp_standard'} + +moduleinfo = {'version': '0.1', 'author': 'Christian Studer', + 'description': 'Query Joe Sandbox API with a report URL to get the parsed data.', + 'module-type': ['expansion']} +moduleconfig = ['apiurl', 'apikey'] + + +def handler(q=False): + if q is False: + return False + request = json.loads(q) + apiurl = request['config'].get('apiurl') or 'https://jbxcloud.joesecurity.org/api' + apikey = request['config'].get('apikey') + if not apikey: + return {'error': 'No API key provided'} + + url = request['attribute']['value'] + if "/submissions/" not in url: + return {'error': "The URL does not point to a Joe Sandbox analysis."} + + submission_id = url.split('/')[-1] # The URL has the format https://example.net/submissions/12345 + joe = jbxapi.JoeSandbox(apiurl=apiurl, apikey=apikey, user_agent='MISP joesandbox_query') + + try: + joe_info = joe.submission_info(submission_id) + except jbxapi.ApiError as e: + return {'error': str(e)} + + if joe_info["status"] != "finished": + return {'error': "The analysis has not finished yet."} + + if joe_info['most_relevant_analysis'] is None: + return {'error': "No analysis belongs to this submission."} + + analysis_webid = joe_info['most_relevant_analysis']['webid'] + + joe_parser = JoeParser() + joe_data = json.loads(joe.analysis_download(analysis_webid, 'jsonfixed')[1]) + joe_parser.parse_data(joe_data['analysis']) + joe_parser.finalize_results() + + return {'results': joe_parser.results} + + +def introspection(): + return mispattributes + + +def version(): + moduleinfo['config'] = moduleconfig + return moduleinfo diff --git a/misp_modules/modules/expansion/urlhaus.py b/misp_modules/modules/expansion/urlhaus.py new file mode 100644 index 0000000..12893b9 --- /dev/null +++ b/misp_modules/modules/expansion/urlhaus.py @@ -0,0 +1,133 @@ +from pymisp import MISPAttribute, MISPEvent, MISPObject +import json +import requests + +misperrors = {'error': 'Error'} +mispattributes = {'input': ['domain', 'hostname', 'ip-src', 'ip-dst', 'md5', 'sha256', 'url'], + 'output': ['url', 'filename', 'md5', 'sha256'], + 'format': 'misp_standard'} +moduleinfo = {'version': '0.1', 'author': 'Christian Studer', + 'description': 'Query of the URLhaus API to get additional information about some attributes.', + 'module-type': ['expansion', 'hover']} +moduleconfig = [] + +file_keys = ('filename', 'response_size', 'response_md5', 'response_sha256') +file_relations = ('filename', 'size-in-bytes', 'md5', 'sha256') +vt_keys = ('result', 'link') +vt_types = ('text', 'link') +vt_relations = ('detection-ratio', 'permalink') + + +class URLhaus(): + def __init__(self): + super(URLhaus, self).__init__() + self.misp_event = MISPEvent() + + @staticmethod + def _create_vt_object(virustotal): + vt_object = MISPObject('virustotal-report') + for key, vt_type, relation in zip(vt_keys, vt_types, vt_relations): + vt_object.add_attribute(relation, **{'type': vt_type, 'value': virustotal[key]}) + return vt_object + + def get_result(self): + event = json.loads(self.misp_event.to_json())['Event'] + results = {key: event[key] for key in ('Attribute', 'Object') if (key in event and event[key])} + return {'results': results} + + +class HostQuery(URLhaus): + def __init__(self, attribute): + super(HostQuery, self).__init__() + self.attribute = MISPAttribute() + self.attribute.from_dict(**attribute) + self.url = 'https://urlhaus-api.abuse.ch/v1/host/' + + def query_api(self): + response = requests.post(self.url, data={'host': self.attribute.value}).json() + if 'urls' in response and response['urls']: + for url in response['urls']: + self.misp_event.add_attribute(type='url', value=url['url']) + + +class PayloadQuery(URLhaus): + def __init__(self, attribute): + super(PayloadQuery, self).__init__() + self.attribute = MISPAttribute() + self.attribute.from_dict(**attribute) + self.url = 'https://urlhaus-api.abuse.ch/v1/payload/' + + def query_api(self): + hash_type = self.attribute.type + file_object = MISPObject('file') + if self.attribute.event_id != '0': + file_object.id = self.attribute.object_id + response = requests.post(self.url, data={'{}_hash'.format(hash_type): self.attribute.value}).json() + other_hash_type = 'md5' if hash_type == 'sha256' else 'sha256' + for key, relation in zip(('{}_hash'.format(other_hash_type), 'file_size'), (other_hash_type, 'size-in-bytes')): + if response[key]: + file_object.add_attribute(relation, **{'type': relation, 'value': response[key]}) + if response['virustotal']: + vt_object = self._create_vt_object(response['virustotal']) + file_object.add_reference(vt_object.uuid, 'analyzed-with') + self.misp_event.add_object(**vt_object) + _filename_ = 'filename' + for url in response['urls']: + attribute = MISPAttribute() + attribute.from_dict(type='url', value=url['url']) + self.misp_event.add_attribute(**attribute) + file_object.add_reference(attribute.uuid, 'retrieved-from') + if url[_filename_]: + file_object.add_attribute(_filename_, **{'type': _filename_, 'value': url[_filename_]}) + self.misp_event.add_object(**file_object) + + +class UrlQuery(URLhaus): + def __init__(self, attribute): + super(UrlQuery, self).__init__() + self.attribute = MISPAttribute() + self.attribute.from_dict(**attribute) + self.url = 'https://urlhaus-api.abuse.ch/v1/url/' + + @staticmethod + def _create_file_object(payload): + file_object = MISPObject('file') + for key, relation in zip(file_keys, file_relations): + if payload[key]: + file_object.add_attribute(relation, **{'type': relation, 'value': payload[key]}) + return file_object + + def query_api(self): + response = requests.post(self.url, data={'url': self.attribute.value}).json() + if 'payloads' in response and response['payloads']: + for payload in response['payloads']: + file_object = self._create_file_object(payload) + if payload['virustotal']: + vt_object = self._create_vt_object(payload['virustotal']) + file_object.add_reference(vt_object.uuid, 'analyzed-with') + self.misp_event.add_object(**vt_object) + self.misp_event.add_object(**file_object) + + +_misp_type_mapping = {'url': UrlQuery, 'md5': PayloadQuery, 'sha256': PayloadQuery, + 'domain': HostQuery, 'hostname': HostQuery, + 'ip-src': HostQuery, 'ip-dst': HostQuery} + + +def handler(q=False): + if q is False: + return False + request = json.loads(q) + attribute = request['attribute'] + urlhaus_parser = _misp_type_mapping[attribute['type']](attribute) + urlhaus_parser.query_api() + return urlhaus_parser.get_result() + + +def introspection(): + return mispattributes + + +def version(): + moduleinfo['config'] = moduleconfig + return moduleinfo diff --git a/misp_modules/modules/import_mod/__init__.py b/misp_modules/modules/import_mod/__init__.py index c3eea05..65a7069 100644 --- a/misp_modules/modules/import_mod/__init__.py +++ b/misp_modules/modules/import_mod/__init__.py @@ -1,3 +1,6 @@ from . import _vmray # noqa +import os +import sys +sys.path.append('{}/lib'.format('/'.join((os.path.realpath(__file__)).split('/')[:-3]))) -__all__ = ['vmray_import', 'ocr', 'cuckooimport', 'goamlimport', 'email_import', 'mispjson', 'openiocimport', 'threatanalyzer_import', 'csvimport'] +__all__ = ['vmray_import', 'ocr', 'cuckooimport', 'goamlimport', 'email_import', 'mispjson', 'openiocimport', 'threatanalyzer_import', 'csvimport', 'joe_import'] diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py index 0e42d94..5d7408c 100644 --- a/misp_modules/modules/import_mod/csvimport.py +++ b/misp_modules/modules/import_mod/csvimport.py @@ -1,35 +1,78 @@ # -*- coding: utf-8 -*- +from pymisp import MISPEvent, MISPObject +from pymisp import __path__ as pymisp_path +import csv +import io import json import os import base64 -import pymisp misperrors = {'error': 'Error'} moduleinfo = {'version': '0.1', 'author': 'Christian Studer', 'description': 'Import Attributes from a csv file.', 'module-type': ['import']} moduleconfig = [] -inputSource = ['file'] userConfig = {'header': { 'type': 'String', - 'message': 'Define the header of the csv file, with types (included in MISP attribute types or attribute fields) separated by commas.\nFor fields that do not match these types, please use space or simply nothing between commas.\nFor instance: ip-src,domain, ,timestamp'}, + 'message': 'Define the header of the csv file, with types (included in MISP attribute types or attribute fields) separated by commas.\nFor fields that do not match these types or that you want to skip, please use space or simply nothing between commas.\nFor instance: ip-src,domain, ,timestamp'}, 'has_header': { 'type': 'Boolean', - 'message': 'Tick this box ONLY if there is a header line, NOT COMMENTED, in the file (which will be skipped atm).' + 'message': 'Tick this box ONLY if there is a header line, NOT COMMENTED, and all the fields of this header are respecting the recommendations above.' }} +mispattributes = {'userConfig': userConfig, 'inputSource': ['file'], 'format': 'misp_standard'} duplicatedFields = {'mispType': {'mispComment': 'comment'}, 'attrField': {'attrComment': 'comment'}} attributesFields = ['type', 'value', 'category', 'to_ids', 'comment', 'distribution'] +misp_standard_csv_header = ['uuid', 'event_id', 'category', 'type', 'value', 'comment', 'to_ids', 'date', + 'object_relation', 'attribute_tag', 'object_uuid', 'object_name', 'object_meta_category'] +misp_context_additional_fields = ['event_info', 'event_member_org', 'event_source_org', 'event_distribution', + 'event_threat_level_id', 'event_analysis', 'event_date', 'event_tag'] +misp_extended_csv_header = misp_standard_csv_header + misp_context_additional_fields delimiters = [',', ';', '|', '/', '\t', ' '] class CsvParser(): - def __init__(self, header, has_header): - self.header = header - self.fields_number = len(header) - self.has_header = has_header - self.attributes = [] + def __init__(self, header, has_header, data): + data_header = data[0] + self.misp_event = MISPEvent() + if data_header == misp_standard_csv_header or data_header == misp_extended_csv_header: + self.header = misp_standard_csv_header if data_header == misp_standard_csv_header else misp_extended_csv_header[:13] + self.from_misp = True + self.data = data[1:] + else: + self.from_misp = False + self.has_header = has_header + if header: + self.header = header + self.fields_number = len(header) + self.parse_data(data) + else: + self.has_delimiter = True + self.fields_number, self.delimiter, self.header = self.get_delimiter_from_header(data[0]) + self.data = data + descFilename = os.path.join(pymisp_path[0], 'data/describeTypes.json') + with open(descFilename, 'r') as f: + self.MispTypes = json.loads(f.read())['result'].get('types') + for h in self.header: + if not (h in self.MispTypes or h in misp_extended_csv_header): + misperrors['error'] = 'Wrong header field: {}. Please use a header value that can be recognized by MISP (or alternatively skip it using a whitespace).'.format(h) + return misperrors + + def get_delimiter_from_header(self, data): + delimiters_count = {} + for d in delimiters: + length = data.count(d) + if length > 0: + delimiters_count[d] = data.count(d) + if len(delimiters_count) == 0: + length = 0 + delimiter = None + header = [data] + else: + length, delimiter = max((n, v) for v, n in delimiters_count.items()) + header = data.split(delimiter) + return length + 1, delimiter, header def parse_data(self, data): return_data = [] @@ -48,6 +91,8 @@ class CsvParser(): return_data.append(line) # find which delimiter is used self.delimiter = self.find_delimiter() + if self.fields_number == 0: + self.header = return_data[0].split(self.delimiter) self.data = return_data[1:] if self.has_header else return_data def parse_delimiter(self, line): @@ -59,6 +104,38 @@ class CsvParser(): _, delimiter = max((n, v) for v, n in self.delimiter_count.items()) return delimiter + def parse_csv(self): + if self.from_misp: + self.build_misp_event() + else: + self.buildAttributes() + + def build_misp_event(self): + objects = {} + header_length = len(self.header) + attribute_fields = self.header[:1] + self.header[2:6] + self.header[7:8] + for line in self.data: + attribute = {} + try: + a_uuid, _, a_category, a_type, value, comment, to_ids, timestamp, relation, tag, o_uuid, o_name, o_category = line[:header_length] + except ValueError: + continue + for t, v in zip(attribute_fields, (a_uuid, a_category, a_type, value, comment, timestamp)): + attribute[t] = v.strip('"') + attribute['to_ids'] = True if to_ids == '1' else False + if tag: + attribute['Tag'] = [{'name': t.strip()} for t in tag.split(',')] + if relation: + if o_uuid not in objects: + objects[o_uuid] = MISPObject(o_name) + objects[o_uuid].add_attribute(relation, **attribute) + else: + self.misp_event.add_attribute(**attribute) + for uuid, misp_object in objects.items(): + misp_object.uuid = uuid + self.misp_event.add_object(**misp_object) + self.finalize_results() + def buildAttributes(self): # if there is only 1 field of data if self.delimiter is None: @@ -66,7 +143,7 @@ class CsvParser(): for data in self.data: d = data.strip() if d: - self.attributes.append({'types': mispType, 'values': d}) + self.misp_event.add_attribute(**{'type': mispType, 'value': d}) else: # split fields that should be recognized as misp attribute types from the others list2pop, misp, head = self.findMispTypes() @@ -82,23 +159,21 @@ class CsvParser(): datamisp.append(datasplit.pop(l).strip()) # for each misp type, we create an attribute for m, dm in zip(misp, datamisp): - attribute = {'types': m, 'values': dm} + attribute = {'type': m, 'value': dm} for h, ds in zip(head, datasplit): if h: attribute[h] = ds.strip() - self.attributes.append(attribute) + self.misp_event.add_attribute(**attribute) + self.finalize_results() def findMispTypes(self): - descFilename = os.path.join(pymisp.__path__[0], 'data/describeTypes.json') - with open(descFilename, 'r') as f: - MispTypes = json.loads(f.read())['result'].get('types') list2pop = [] misp = [] head = [] for h in reversed(self.header): n = self.header.index(h) # fields that are misp attribute types - if h in MispTypes: + if h in self.MispTypes: list2pop.append(n) misp.append(h) # handle confusions between misp attribute types and attribute fields @@ -118,6 +193,10 @@ class CsvParser(): # return list of indexes of the misp types, list of the misp types, remaining fields that will be attribute fields return list2pop, misp, list(reversed(head)) + def finalize_results(self): + event = json.loads(self.misp_event.to_json())['Event'] + self.results = {key: event[key] for key in ('Attribute', 'Object') if (key in event and event[key])} + def handler(q=False): if q is False: @@ -125,37 +204,29 @@ def handler(q=False): request = json.loads(q) if request.get('data'): data = base64.b64decode(request['data']).decode('utf-8') + data = [line for line in csv.reader(io.TextIOWrapper(io.BytesIO(data.encode()), encoding='utf-8'))] else: misperrors['error'] = "Unsupported attributes type" return misperrors - if not request.get('config') and not request['config'].get('header'): - misperrors['error'] = "Configuration error" - return misperrors - header = request['config'].get('header').split(',') - header = [c.strip() for c in header] has_header = request['config'].get('has_header') has_header = True if has_header == '1' else False - csv_parser = CsvParser(header, has_header) - csv_parser.parse_data(data.split('\n')) + if not request.get('config') and not request['config'].get('header'): + if has_header: + header = [] + else: + misperrors['error'] = "Configuration error" + return misperrors + else: + header = request['config'].get('header').split(',') + header = [c.strip() for c in header] + csv_parser = CsvParser(header, has_header, data) # build the attributes - csv_parser.buildAttributes() - r = {'results': csv_parser.attributes} - return r + csv_parser.parse_csv() + return {'results': csv_parser.results} def introspection(): - modulesetup = {} - try: - userConfig - modulesetup['userConfig'] = userConfig - except NameError: - pass - try: - inputSource - modulesetup['inputSource'] = inputSource - except NameError: - pass - return modulesetup + return mispattributes def version(): diff --git a/misp_modules/modules/import_mod/goamlimport.py b/misp_modules/modules/import_mod/goamlimport.py index 7116b44..79b4cfe 100644 --- a/misp_modules/modules/import_mod/goamlimport.py +++ b/misp_modules/modules/import_mod/goamlimport.py @@ -9,7 +9,8 @@ moduleinfo = {'version': 1, 'author': 'Christian Studer', 'description': 'Import from GoAML', 'module-type': ['import']} moduleconfig = [] -mispattributes = {'inputSource': ['file'], 'output': ['MISP objects']} +mispattributes = {'inputSource': ['file'], 'output': ['MISP objects'], + 'format': 'misp_standard'} t_from_objects = {'nodes': ['from_person', 'from_account', 'from_entity'], 'leaves': ['from_funds_code', 'from_country']} @@ -164,7 +165,7 @@ def handler(q=False): misperrors['error'] = "Impossible to read XML data" return misperrors aml_parser.parse_xml() - r = {'results': [obj.to_json() for obj in aml_parser.misp_event.objects]} + r = {'results': {'Object': [obj.to_json() for obj in aml_parser.misp_event.objects]}} return r diff --git a/misp_modules/modules/import_mod/joe_import.py b/misp_modules/modules/import_mod/joe_import.py new file mode 100644 index 0000000..d1c4d19 --- /dev/null +++ b/misp_modules/modules/import_mod/joe_import.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- +import base64 +import json +from joe_parser import JoeParser + +misperrors = {'error': 'Error'} +userConfig = {} +inputSource = ['file'] + +moduleinfo = {'version': '0.1', 'author': 'Christian Studer', + 'description': 'Import for Joe Sandbox JSON reports', + 'module-type': ['import']} + +moduleconfig = [] + + +def handler(q=False): + if q is False: + return False + q = json.loads(q) + data = base64.b64decode(q.get('data')).decode('utf-8') + if not data: + return json.dumps({'success': 0}) + joe_parser = JoeParser() + joe_parser.parse_data(json.loads(data)['analysis']) + joe_parser.finalize_results() + return {'results': joe_parser.results} + + +def introspection(): + modulesetup = {} + try: + userConfig + modulesetup['userConfig'] = userConfig + except NameError: + pass + try: + inputSource + modulesetup['inputSource'] = inputSource + except NameError: + pass + modulesetup['format'] = 'misp_standard' + return modulesetup + + +def version(): + moduleinfo['config'] = moduleconfig + return moduleinfo