From f361fb4ee3f9de69bbe6d0c27c42af7e4f373769 Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Tue, 20 Feb 2018 17:00:13 +0100 Subject: [PATCH 01/34] Reading the entire document, to create a big dictionary containing the data, as a beginning --- .../modules/import_mod/goamlimport.py | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 misp_modules/modules/import_mod/goamlimport.py diff --git a/misp_modules/modules/import_mod/goamlimport.py b/misp_modules/modules/import_mod/goamlimport.py new file mode 100644 index 0000000..dbef826 --- /dev/null +++ b/misp_modules/modules/import_mod/goamlimport.py @@ -0,0 +1,99 @@ +import json, datetime +import xml.etree.ElementTree as ET +from collections import defaultdict + +misperrors = {'error': 'Error'} +moduleinfo = {'version': 1, 'author': 'Christian Studer', + 'description': 'Import from GoAML', + 'module-type': ['import']} +moduleconfig = [] +mispattributes = {'input': ['xml file'], 'output': ['MISPEvent']} + +t_from = {'nodes': ['from_person', 'from_account', 'from_entity'], + 'leaves': ['from_funds_code', 'from_country']} +t_to = {'nodes': ['to_person', 'to_account', 'to_entity'], + 'leaves': ['to_funds_code', 'to_country']} +t_person = {'nodes': ['addresses'], + 'leaves': ['first_name', 'middle_name', 'last_name', 'gender', 'title', 'mothers_name', 'birthdate', + 'passport_number', 'passport_country', 'id_number', 'birth_place', 'alias', 'nationality1']} +t_account = {'nodes': ['signatory'], + 'leaves': ['institution_name', 'institution_code', 'swift', 'branch', 'non_banking_insitution', + 'account', 'currency_code', 'account_name', 'iban', 'client_number', 'opened', 'closed', + 'personal_account_type', 'balance', 'date_balance', 'status_code', 'beneficiary', + 'beneficiary_comment', 'comments']} +entity = {'nodes': ['addresses'], + 'leaves': ['name', 'commercial_name', 'incorporation_legal_form', 'incorporation_number', 'business', 'phone']} + +goAMLobjects = {'report': {'nodes': ['reporting_person', 'location', 'transaction'], + 'leaves': ['rentity_id', 'submission_code', 'report_code', 'submission_date', + 'currency_code_local']}, + 'reporting_person': {'nodes': ['addresses'], + 'leaves': ['first_name', 'middle_name', 'last_name', 'title']}, + 'location': {'nodes': [], + 'leaves': ['address_type', 'address', 'city', 'zip', 'country_code', 'state']}, + 'transaction': {'nodes': ['t_from', 't_from_my_client', 't_to', 't_to_my_client'], + 'leaves': ['transactionnumber', 'transaction_location', 'date_transaction', + 'transmode_code', 'amount_local']}, + 't_from': t_from, + 't_from_my_client': t_from, + 't_to': t_to, + 't_to_my_client': t_to, + 'addresses': {'nodes': ['address'], 'leaves': []}, + 'address': {'nodes': [], + 'leaves': ['address_type', 'address', 'city', 'zip', 'country_code', 'state']}, + 'from_person': t_person, + 'to_person': t_person, + 't_person': t_person, + 'from_account': t_account, + 'to_account': t_account, + 'signatory': {'nodes': ['t_person'], 'leaves': []}, + 'from_entity': entity, + 'to_entity': entity, + } + +class GoAmlParser(): + def __init__(self): + self.dict = defaultdict(list) + + def readFile(self, filename): + self.tree = ET.parse(filename).getroot() + + def parse_xml(self): + self.itterate(self.tree, 'report') + + def itterate(self, tree, aml_type): + elementDict = {} + for element in tree: + tag = element.tag + print(tag) + mapping = goAMLobjects.get(aml_type) + if tag in mapping.get('nodes'): + self.itterate(element, tag) + elif tag in mapping.get('leaves'): + elementDict[tag] = element.text + self.dict[aml_type].append(elementDict) + +def handler(q=False): + if q is False: + return False + request = json.loads(q) + if request.get('file'): + filename = request['file'] + else: + misperrors['error'] = "Unsupported attributes type" + return misperrors + aml_parser = GoAmlParser() + try: + aml_parser.readFile(filename) + except: + misperrors['error'] = "Impossible to read the file" + return misperrors + aml_parser.parse_xml() + return aml_parser.dict + +def introspection(): + return mispattributes + +def version(): + moduleinfo['config'] = moduleconfig + return moduleinfo From 5995458aab0179d50925a14bb12d1d3605176990 Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Wed, 21 Feb 2018 17:14:26 +0100 Subject: [PATCH 02/34] fix: Added the moduleinfo field need to have MISP event in standard format --- misp_modules/modules/export_mod/goamlexport.py | 3 ++- misp_modules/modules/export_mod/pdfexport.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/misp_modules/modules/export_mod/goamlexport.py b/misp_modules/modules/export_mod/goamlexport.py index 2859b3c..e678024 100644 --- a/misp_modules/modules/export_mod/goamlexport.py +++ b/misp_modules/modules/export_mod/goamlexport.py @@ -5,7 +5,8 @@ from collections import defaultdict, Counter misperrors = {'error': 'Error'} moduleinfo = {'version': '1', 'author': 'Christian Studer', 'description': 'Export to GoAML', - 'module-type': ['export']} + 'module-type': ['export'], + 'require_standard_format': True} moduleconfig = ['rentity_id'] mispattributes = {'input': ['MISPEvent'], 'output': ['xml file']} outputFileExtension = "xml" diff --git a/misp_modules/modules/export_mod/pdfexport.py b/misp_modules/modules/export_mod/pdfexport.py index 4ee7bd7..2aeaec7 100755 --- a/misp_modules/modules/export_mod/pdfexport.py +++ b/misp_modules/modules/export_mod/pdfexport.py @@ -15,7 +15,8 @@ misperrors = {'error': 'Error'} moduleinfo = {'version': '1', 'author': 'Raphaƫl Vinot', 'description': 'Simple export to PDF', - 'module-type': ['export']} + 'module-type': ['export'], + 'require_standard_format': True} moduleconfig = [] From 9b34602f73beac61083fa8669c0b40e6a2c0b787 Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Thu, 22 Feb 2018 01:22:08 +0100 Subject: [PATCH 03/34] Added GoAML export module in description --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 11c28b4..67ba189 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,7 @@ For more information: [Extending MISP with Python modules](https://www.circl.lu/ ### Export modules * [CEF](misp_modules/modules/export_mod/cef_export.py) module to export Common Event Format (CEF). +* [GoAML export](misp_modules/modules/export_mod/goamlexport.py) module to export in GoAML format. * [Lite Export](misp_modules/modules/export_mod/liteexport.py) module to export a lite event. * [Simple PDF export](misp_modules/modules/export_mod/pdfexport.py) module to export in PDF (required: asciidoctor-pdf). * [ThreatConnect](misp_modules/modules/export_mod/threat_connect_export.py) module to export in ThreatConnect CSV format. From c942013812512184fb79e2bd20dc39c070580c18 Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Thu, 22 Feb 2018 01:23:08 +0100 Subject: [PATCH 04/34] chg: Modified the mapping dictionary to support misp-objects updates --- .../modules/export_mod/goamlexport.py | 61 +++++++++++-------- 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/misp_modules/modules/export_mod/goamlexport.py b/misp_modules/modules/export_mod/goamlexport.py index e678024..e732584 100644 --- a/misp_modules/modules/export_mod/goamlexport.py +++ b/misp_modules/modules/export_mod/goamlexport.py @@ -14,26 +14,33 @@ responseType = "application/xml" objects_to_parse = ['transaction', 'bank-account', 'person', 'entity', 'geolocation'] -goAMLmapping = {'bank-account': 't_account', 'institution-code': 'institution_code', 'iban': 'iban', - 'swift': 'swift', 'branch': 'branch', 'non-banking-institution': 'non_bank_institution', - 'account': 'account', 'currency-code': 'currency_code', 'account-name': 'account_name', - 'client-number': 'client_number', 'personal-account-type': 'personal_account_type', - 'opened': 'opened', 'closed': 'closed', 'balance': 'balance', 'status-code': 'status_code', - 'beneficiary': 'beneficiary', 'beneficiary-comment': 'beneficiary_comment', 'comments': 'comments', - 'person': 't_person', 'text': 'comments', 'first-name': 'first_name', 'middle-name': 'middle_name', - 'last-name': 'last_name', 'mothers-name': 'mothers_name', 'title': 'title', 'alias': 'alias', - 'date-of-birth': 'birthdate', 'place-of-birth': 'birth_place', 'gender': 'gender', - 'passport-number': 'passport_number', 'passport-country': 'passport_country', - 'social-security-number': 'ssn', 'nationality': 'nationality1', 'identity-card-number': 'id_number', - 'geolocation': 'location', 'city': 'city', 'region': 'state', 'country': 'country-code', - 'address': 'address', 'zipcode': 'zip', - 'transaction': 'transaction', 'transaction-number': 'transactionnumber', 'date': 'date_transaction', - 'location': 'transaction_location', 'transmode-code': 'transmode_code', 'amount': 'amount_local', - 'transmode-comment': 'transmode_comment', 'date-posting': 'date_posting', 'teller': 'teller', - 'authorized': 'authorized', - 'legal-entity': 'entity', 'name': 'name', 'commercial-name': 'commercial_name', 'business': 'business', - 'legal-form': 'incorporation_legal_form', 'registration-number': 'incorporation_number', - 'phone-number': 'phone'} +goAMLmapping = {'bank-account': {'bank-account': 't_account', 'institution-name': 'institution_name', + 'institution-code': 'institution_code', 'iban': 'iban', 'swift': 'swift', + 'branch': 'branch', 'non-banking-institution': 'non_bank_institution', + 'account': 'account', 'currency-code': 'currency_code', + 'account-name': 'account_name', 'client-number': 'client_number', + 'personal-account-type': 'personal_account_type', 'opened': 'opened', + 'closed': 'closed', 'balance': 'balance', 'status-code': 'status_code', + 'beneficiary': 'beneficiary', 'beneficiary-comment': 'beneficiary_comment', + 'comments': 'comments'}, + 'person': {'person': 't_person', 'text': 'comments', 'first-name': 'first_name', + 'middle-name': 'middle_name', 'last-name': 'last_name', 'title': 'title', + 'mothers-name': 'mothers_name', 'alias': 'alias', 'date-of-birth': 'birthdate', + 'place-of-birth': 'birth_place', 'gender': 'gender','nationality': 'nationality1', + 'passport-number': 'passport_number', 'passport-country': 'passport_country', + 'social-security-number': 'ssn', 'identity-card-number': 'id_number'}, + 'geolocation': {'geolocation': 'location', 'city': 'city', 'region': 'state', + 'country': 'country-code', 'address': 'address', 'zipcode': 'zip'}, + 'transaction': {'transaction': 'transaction', 'transaction-number': 'transactionnumber', + 'date': 'date_transaction', 'location': 'transaction_location', + 'transmode-code': 'transmode_code', 'amount': 'amount_local', + 'transmode-comment': 'transmode_comment', 'date-posting': 'date_posting', + 'teller': 'teller', 'authorized': 'authorized', + 'text': 'transaction_description'}, + 'legal-enitty': {'legal-entity': 'entity', 'name': 'name', 'business': 'business', + 'commercial-name': 'commercial_name', 'phone-number': 'phone', + 'legal-form': 'incorporation_legal_form', + 'registration-number': 'incorporation_number'}} referencesMapping = {'bank-account': {'aml_type': '{}_account', 'bracket': 't_{}'}, 'person': {'transaction': {'aml_type': '{}_person', 'bracket': 't_{}'}, 'bank-account': {'aml_type': 't_person', 'bracket': 'signatory'}}, @@ -89,7 +96,7 @@ class GoAmlGeneration(object): obj = self.misp_event.get_object_by_uuid(uuid) if object_type == 'transaction': self.xml[xml_part] += "<{}>".format(aml_type) - self.fill_xml_transaction(obj.attributes, xml_part) + self.fill_xml_transaction(object_type, obj.attributes, xml_part) self.parsed_uuids[object_type].append(uuid) if obj.ObjectReference: self.parseObjectReferences(object_type, xml_part, obj.ObjectReference) @@ -105,7 +112,7 @@ class GoAmlGeneration(object): def itterate_normal_case(self, object_type, obj, aml_type, uuid, xml_part): self.xml[xml_part] += "<{}>".format(aml_type) - self.fill_xml(obj, xml_part) + self.fill_xml(object_type, obj, xml_part) self.parsed_uuids[object_type].append(uuid) if obj.ObjectReference: self.parseObjectReferences(object_type, xml_part, obj.ObjectReference) @@ -118,7 +125,7 @@ class GoAmlGeneration(object): relationship_type = ref.relationship_type self.parse_references(object_type, next_object_type, next_uuid, relationship_type, xml_part) - def fill_xml_transaction(self, attributes, xml_part): + def fill_xml_transaction(self, object_type, attributes, xml_part): from_and_to_fields = {'from': {}, 'to': {}} for attribute in attributes: object_relation = attribute.object_relation @@ -134,12 +141,12 @@ class GoAmlGeneration(object): from_and_to_fields[relation_type][field] = attribute_value continue try: - self.xml[xml_part] += "<{0}>{1}".format(goAMLmapping[object_relation], attribute_value) + self.xml[xml_part] += "<{0}>{1}".format(goAMLmapping[object_type][object_relation], attribute_value) except KeyError: pass self.from_and_to_fields = from_and_to_fields - def fill_xml(self, obj, xml_part): + def fill_xml(self, object_type, obj, xml_part): if obj.name == 'bank-account': for attribute in obj.attributes: if attribute.object_relation in ('personal-account-type', 'status-code'): @@ -147,13 +154,13 @@ class GoAmlGeneration(object): else: attribute_value = attribute.value try: - self.xml[xml_part] += "<{0}>{1}".format(goAMLmapping[attribute.object_relation], attribute_value) + self.xml[xml_part] += "<{0}>{1}".format(goAMLmapping[object_type][attribute.object_relation], attribute_value) except KeyError: pass else: for attribute in obj.attributes: try: - self.xml[xml_part] += "<{0}>{1}".format(goAMLmapping[attribute.object_relation], attribute.value) + self.xml[xml_part] += "<{0}>{1}".format(goAMLmapping[object_type][attribute.object_relation], attribute.value) except KeyError: pass From 694a63c8f3293f63bf0e241e6ad362f04e083ece Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Thu, 22 Feb 2018 10:29:05 +0100 Subject: [PATCH 05/34] add: Added an example file generated by GoAML export module --- tests/goamlexport.xml | 1 + 1 file changed, 1 insertion(+) create mode 100644 tests/goamlexport.xml diff --git a/tests/goamlexport.xml b/tests/goamlexport.xml new file mode 100644 index 0000000..ae3ea80 --- /dev/null +++ b/tests/goamlexport.xml @@ -0,0 +1 @@ +2510ESTR2018-02-22T08:34:16+00:00EURTW000009011 Manners Street WellingtonBG2015-12-01T10:03:0012345when it transactsEAAEUR31032027088ATTBVIThe bankNickPittSir1993-09-25Mulhouse, FranceMale
ParisFrance
FRA
KMichelJeanHimselfPrefer not to say
LuxembourgLuxembourg
LUX
From b2b0fccd47c42f54dff290d5b5a000cb0e134b93 Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Thu, 22 Feb 2018 16:37:27 +0100 Subject: [PATCH 06/34] fix: Added an object checking - Checking if there are objects in the event, and then if there is at least 1 transaction object - This prevents the module from crashing, but does not guaranty having a valid GoAML file (depending on objects and their relations) --- misp_modules/modules/export_mod/goamlexport.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/misp_modules/modules/export_mod/goamlexport.py b/misp_modules/modules/export_mod/goamlexport.py index e732584..76bbdc8 100644 --- a/misp_modules/modules/export_mod/goamlexport.py +++ b/misp_modules/modules/export_mod/goamlexport.py @@ -194,6 +194,15 @@ def handler(q=False): config = request['config'].get('rentity_id') export_doc = GoAmlGeneration(config) export_doc.from_event(request['data'][0]) + if not export_doc.misp_event.Object: + misperrors['error'] = "There is no object in this event." + return misperrors + types = [] + for obj in export_doc.misp_event.Object: + types.append(obj.name) + if 'transaction' not in types: + misperrors['error'] = "There is no transaction object in this event." + return misperrors export_doc.parse_objects() export_doc.build_xml() exp_doc = "{}{}".format(export_doc.xml.get('header'), export_doc.xml.get('data')) From 983b7da7b71ed3e5d9f139167400ecf5e8c49e6c Mon Sep 17 00:00:00 2001 From: Christian Studer Date: Thu, 22 Feb 2018 16:55:52 +0100 Subject: [PATCH 07/34] fix: Added an object checking - Checking if there are objects in the event, and then if there is at least 1 transaction object - This prevents the module from crashing, but does not guaranty having a valid GoAML file (depending on objects and their relations) --- misp_modules/modules/export_mod/goamlexport.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/misp_modules/modules/export_mod/goamlexport.py b/misp_modules/modules/export_mod/goamlexport.py index e732584..76bbdc8 100644 --- a/misp_modules/modules/export_mod/goamlexport.py +++ b/misp_modules/modules/export_mod/goamlexport.py @@ -194,6 +194,15 @@ def handler(q=False): config = request['config'].get('rentity_id') export_doc = GoAmlGeneration(config) export_doc.from_event(request['data'][0]) + if not export_doc.misp_event.Object: + misperrors['error'] = "There is no object in this event." + return misperrors + types = [] + for obj in export_doc.misp_event.Object: + types.append(obj.name) + if 'transaction' not in types: + misperrors['error'] = "There is no transaction object in this event." + return misperrors export_doc.parse_objects() export_doc.build_xml() exp_doc = "{}{}".format(export_doc.xml.get('header'), export_doc.xml.get('data')) From 359ac9100ebb9ed41c853d228cc84e2d4cb2441c Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Fri, 23 Feb 2018 15:58:04 +0100 Subject: [PATCH 08/34] fix: typo in references mapping dictionary --- misp_modules/modules/export_mod/goamlexport.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/misp_modules/modules/export_mod/goamlexport.py b/misp_modules/modules/export_mod/goamlexport.py index 76bbdc8..f6d3ff5 100644 --- a/misp_modules/modules/export_mod/goamlexport.py +++ b/misp_modules/modules/export_mod/goamlexport.py @@ -44,7 +44,7 @@ goAMLmapping = {'bank-account': {'bank-account': 't_account', 'institution-name' referencesMapping = {'bank-account': {'aml_type': '{}_account', 'bracket': 't_{}'}, 'person': {'transaction': {'aml_type': '{}_person', 'bracket': 't_{}'}, 'bank-account': {'aml_type': 't_person', 'bracket': 'signatory'}}, - 'legal-entity': {'transaction': {'aml_type': '{}_entity', 'bracket': 't_{}'}, 'bank-account': {'aml_type': 'entity'}}, + 'legal-entity': {'transaction': {'aml_type': '{}_entity', 'bracket': 't_{}'}, 'bank-account': {'aml_type': 't_entity'}}, 'geolocation': {'aml_type': 'address', 'bracket': 'addresses'}} class GoAmlGeneration(object): From 81a6be17d3f567ced1517b216d8c847cbd1e0267 Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Mon, 26 Feb 2018 11:47:35 +0100 Subject: [PATCH 09/34] chg: Structurded data --- misp_modules/modules/import_mod/goamlimport.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/misp_modules/modules/import_mod/goamlimport.py b/misp_modules/modules/import_mod/goamlimport.py index dbef826..a2cda32 100644 --- a/misp_modules/modules/import_mod/goamlimport.py +++ b/misp_modules/modules/import_mod/goamlimport.py @@ -24,7 +24,7 @@ t_account = {'nodes': ['signatory'], entity = {'nodes': ['addresses'], 'leaves': ['name', 'commercial_name', 'incorporation_legal_form', 'incorporation_number', 'business', 'phone']} -goAMLobjects = {'report': {'nodes': ['reporting_person', 'location', 'transaction'], +goAMLobjects = {'report': {'nodes': ['reporting_person', 'location'], 'leaves': ['rentity_id', 'submission_code', 'report_code', 'submission_date', 'currency_code_local']}, 'reporting_person': {'nodes': ['addresses'], @@ -53,25 +53,27 @@ goAMLobjects = {'report': {'nodes': ['reporting_person', 'location', 'transactio class GoAmlParser(): def __init__(self): - self.dict = defaultdict(list) + self.dict = {} def readFile(self, filename): self.tree = ET.parse(filename).getroot() def parse_xml(self): - self.itterate(self.tree, 'report') + self.dict = self.itterate(self.tree, 'report') + self.dict['transaction'] = [] + for t in self.tree.findall('transaction'): + self.dict['transaction'].append(self.itterate(t, 'transaction')) def itterate(self, tree, aml_type): elementDict = {} for element in tree: tag = element.tag - print(tag) mapping = goAMLobjects.get(aml_type) if tag in mapping.get('nodes'): - self.itterate(element, tag) + elementDict[tag] = self.itterate(element, tag) elif tag in mapping.get('leaves'): elementDict[tag] = element.text - self.dict[aml_type].append(elementDict) + return elementDict def handler(q=False): if q is False: From 5df2d309a0e514b848746e9cb7ed5f1c9f11a9c8 Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Mon, 26 Feb 2018 15:58:53 +0100 Subject: [PATCH 10/34] typo --- misp_modules/modules/export_mod/goamlexport.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/misp_modules/modules/export_mod/goamlexport.py b/misp_modules/modules/export_mod/goamlexport.py index f6d3ff5..f32aef2 100644 --- a/misp_modules/modules/export_mod/goamlexport.py +++ b/misp_modules/modules/export_mod/goamlexport.py @@ -37,7 +37,7 @@ goAMLmapping = {'bank-account': {'bank-account': 't_account', 'institution-name' 'transmode-comment': 'transmode_comment', 'date-posting': 'date_posting', 'teller': 'teller', 'authorized': 'authorized', 'text': 'transaction_description'}, - 'legal-enitty': {'legal-entity': 'entity', 'name': 'name', 'business': 'business', + 'legal-entity': {'legal-entity': 'entity', 'name': 'name', 'business': 'business', 'commercial-name': 'commercial_name', 'phone-number': 'phone', 'legal-form': 'incorporation_legal_form', 'registration-number': 'incorporation_number'}} From 478cd53912238c820928ebb615efb62dd987736e Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Mon, 26 Feb 2018 18:13:43 +0100 Subject: [PATCH 11/34] add: Added dictionary to map aml types into MISP types --- .../modules/import_mod/goamlimport.py | 71 ++++++++++++------- 1 file changed, 47 insertions(+), 24 deletions(-) diff --git a/misp_modules/modules/import_mod/goamlimport.py b/misp_modules/modules/import_mod/goamlimport.py index a2cda32..ea6a3cb 100644 --- a/misp_modules/modules/import_mod/goamlimport.py +++ b/misp_modules/modules/import_mod/goamlimport.py @@ -1,6 +1,7 @@ import json, datetime import xml.etree.ElementTree as ET from collections import defaultdict +from pymisp import MISPEvent misperrors = {'error': 'Error'} moduleinfo = {'version': 1, 'author': 'Christian Studer', @@ -9,51 +10,72 @@ moduleinfo = {'version': 1, 'author': 'Christian Studer', moduleconfig = [] mispattributes = {'input': ['xml file'], 'output': ['MISPEvent']} -t_from = {'nodes': ['from_person', 'from_account', 'from_entity'], +t_from_objects = {'nodes': ['from_person', 'from_account', 'from_entity'], 'leaves': ['from_funds_code', 'from_country']} -t_to = {'nodes': ['to_person', 'to_account', 'to_entity'], +t_to_objects = {'nodes': ['to_person', 'to_account', 'to_entity'], 'leaves': ['to_funds_code', 'to_country']} -t_person = {'nodes': ['addresses'], +t_person_objects = {'nodes': ['addresses'], 'leaves': ['first_name', 'middle_name', 'last_name', 'gender', 'title', 'mothers_name', 'birthdate', 'passport_number', 'passport_country', 'id_number', 'birth_place', 'alias', 'nationality1']} -t_account = {'nodes': ['signatory'], +t_account_objects = {'nodes': ['signatory'], 'leaves': ['institution_name', 'institution_code', 'swift', 'branch', 'non_banking_insitution', 'account', 'currency_code', 'account_name', 'iban', 'client_number', 'opened', 'closed', 'personal_account_type', 'balance', 'date_balance', 'status_code', 'beneficiary', 'beneficiary_comment', 'comments']} -entity = {'nodes': ['addresses'], +entity_objects = {'nodes': ['addresses'], 'leaves': ['name', 'commercial_name', 'incorporation_legal_form', 'incorporation_number', 'business', 'phone']} goAMLobjects = {'report': {'nodes': ['reporting_person', 'location'], - 'leaves': ['rentity_id', 'submission_code', 'report_code', 'submission_date', - 'currency_code_local']}, - 'reporting_person': {'nodes': ['addresses'], - 'leaves': ['first_name', 'middle_name', 'last_name', 'title']}, - 'location': {'nodes': [], - 'leaves': ['address_type', 'address', 'city', 'zip', 'country_code', 'state']}, + 'leaves': ['rentity_id', 'submission_code', 'report_code', 'submission_date', 'currency_code_local']}, + 'reporting_person': {'nodes': ['addresses'], 'leaves': ['first_name', 'middle_name', 'last_name', 'title']}, + 'location': {'nodes': [], 'leaves': ['address_type', 'address', 'city', 'zip', 'country_code', 'state']}, 'transaction': {'nodes': ['t_from', 't_from_my_client', 't_to', 't_to_my_client'], 'leaves': ['transactionnumber', 'transaction_location', 'date_transaction', 'transmode_code', 'amount_local']}, - 't_from': t_from, - 't_from_my_client': t_from, - 't_to': t_to, - 't_to_my_client': t_to, + 't_from': t_from_objects, 't_from_my_client': t_from_objects, + 't_to': t_to_objects, 't_to_my_client': t_to_objects, 'addresses': {'nodes': ['address'], 'leaves': []}, - 'address': {'nodes': [], - 'leaves': ['address_type', 'address', 'city', 'zip', 'country_code', 'state']}, - 'from_person': t_person, - 'to_person': t_person, - 't_person': t_person, - 'from_account': t_account, - 'to_account': t_account, + 'address': {'nodes': [], 'leaves': ['address_type', 'address', 'city', 'zip', 'country_code', 'state']}, + 'from_person': t_person_objects, 'to_person': t_person_objects, 't_person': t_person_objects, + 'from_account': t_account_objects, 'to_account': t_account_objects, 'signatory': {'nodes': ['t_person'], 'leaves': []}, - 'from_entity': entity, - 'to_entity': entity, + 'from_entity': entity_objects, 'to_entity': entity_objects, } +t_account_mapping = {'t_account': 'bank-account', 'institution_name': 'institution-name', 'institution_code': 'institution-code', + 'iban': 'iban', 'swift': 'swift', 'branch': 'branch', 'non_banking_institution': 'non-bank-institution', + 'account': 'account', 'currency_code': 'currency-code', 'account_name': 'account-name', + 'client_number': 'client-number', 'personal_account_type': 'personal-account-type', 'opened': 'opened', + 'closed': 'closed', 'balance': 'balance', 'status_code': 'status-code', 'beneficiary': 'beneficiary', + 'beneficiary_comment': 'beneficiary-comment', 'comments': 'comments'} + +t_person_mapping = {'t_person': 'person', 'comments': 'text', 'first_name': 'first-name', 'middle_name': 'middle-name', + 'last_name': 'last-name', 'title': 'title', 'mothers_name': 'mothers-name', 'alias': 'alias', + 'birthdate': 'date-of-birth', 'birth_place': 'place-of-birth', 'gender': 'gender','nationality1': 'nationality', + 'passport_number': 'passport-number', 'passport_country': 'passport-country', 'ssn': 'social-security-number', + 'id_number': 'identity-card-number'} + +location_mapping = {'location': 'geolocation', 'city': 'city', 'state': 'region', 'country-code': 'country', 'address': 'address', + 'zip': 'zipcode'} + +t_entity_mapping = {'entity': 'legal-entity', 'name': 'name', 'business': 'business', 'commercial_name': 'commercial-name', + 'phone': 'phone-number', 'incorporation_legal_form': 'legal-form', 'incorporation_number': 'registration-number'} + +goAMLmapping = {'from_account': t_account_mapping, 'to_account': t_account_mapping, + 'from_person': t_person_mapping, 'to_person': t_person_mapping, 'reporting_person': t_person_mapping, + 'from_entity': t_entity_mapping, 'to_entity': t_entity_mapping, + 'location': location_mapping, 'address': location_mapping, + 'transaction': {'transaction': 'transaction', 'transactionnumber': 'transaction-number', 'date_transaction': 'date', + 'transaction_location': 'location', 'transmode_code': 'transmode-code', 'amount_local': 'amount', + 'transmode_comment': 'transmode-comment', 'date_posting': 'date-posting', 'teller': 'teller', + 'authorized': 'authorized', 'transaction_description': 'text'}} + +nodes_to_ignore = ['addresses', 'signatory'] + class GoAmlParser(): def __init__(self): self.dict = {} + self.misp_event = MISPEvent() def readFile(self, filename): self.tree = ET.parse(filename).getroot() @@ -63,6 +85,7 @@ class GoAmlParser(): self.dict['transaction'] = [] for t in self.tree.findall('transaction'): self.dict['transaction'].append(self.itterate(t, 'transaction')) + self.misp_event.timestamp = self.dict.get('submission_date') def itterate(self, tree, aml_type): elementDict = {} From a02dbd6a8dc595f734fb289e651dea8ee8465e40 Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Mon, 26 Feb 2018 18:44:44 +0100 Subject: [PATCH 12/34] fix: Fixed typo of the aml type for country codes --- misp_modules/modules/export_mod/goamlexport.py | 2 +- tests/goamlexport.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/misp_modules/modules/export_mod/goamlexport.py b/misp_modules/modules/export_mod/goamlexport.py index f32aef2..c277640 100644 --- a/misp_modules/modules/export_mod/goamlexport.py +++ b/misp_modules/modules/export_mod/goamlexport.py @@ -30,7 +30,7 @@ goAMLmapping = {'bank-account': {'bank-account': 't_account', 'institution-name' 'passport-number': 'passport_number', 'passport-country': 'passport_country', 'social-security-number': 'ssn', 'identity-card-number': 'id_number'}, 'geolocation': {'geolocation': 'location', 'city': 'city', 'region': 'state', - 'country': 'country-code', 'address': 'address', 'zipcode': 'zip'}, + 'country': 'country_code', 'address': 'address', 'zipcode': 'zip'}, 'transaction': {'transaction': 'transaction', 'transaction-number': 'transactionnumber', 'date': 'date_transaction', 'location': 'transaction_location', 'transmode-code': 'transmode_code', 'amount': 'amount_local', diff --git a/tests/goamlexport.xml b/tests/goamlexport.xml index ae3ea80..4a001b9 100644 --- a/tests/goamlexport.xml +++ b/tests/goamlexport.xml @@ -1 +1 @@ -2510ESTR2018-02-22T08:34:16+00:00EURTW000009011 Manners Street WellingtonBG2015-12-01T10:03:0012345when it transactsEAAEUR31032027088ATTBVIThe bankNickPittSir1993-09-25Mulhouse, FranceMale
ParisFrance
FRA
KMichelJeanHimselfPrefer not to say
LuxembourgLuxembourg
LUX
+2510ESTR2018-02-22T08:34:16+00:00EURTW000009011 Manners Street WellingtonBG2015-12-01T10:03:0012345when it transactsEAAEUR31032027088ATTBVIThe bankNickPittSir1993-09-25Mulhouse, FranceMale
ParisFrance
FRA
KMichelJeanHimselfPrefer not to say
LuxembourgLuxembourg
LUX
From cad62464c5f19aefba17605315cad9d196cbc874 Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Tue, 27 Feb 2018 11:08:37 +0100 Subject: [PATCH 13/34] Now parsing all the transaction attributes --- .../modules/import_mod/goamlimport.py | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/misp_modules/modules/import_mod/goamlimport.py b/misp_modules/modules/import_mod/goamlimport.py index ea6a3cb..4cda375 100644 --- a/misp_modules/modules/import_mod/goamlimport.py +++ b/misp_modules/modules/import_mod/goamlimport.py @@ -88,15 +88,29 @@ class GoAmlParser(): self.misp_event.timestamp = self.dict.get('submission_date') def itterate(self, tree, aml_type): - elementDict = {} + element_dict = {} for element in tree: tag = element.tag mapping = goAMLobjects.get(aml_type) if tag in mapping.get('nodes'): - elementDict[tag] = self.itterate(element, tag) + if aml_type == 'transaction': + self.fill_transaction(element, element_dict, tag) + element_dict[tag] = self.itterate(element, tag) elif tag in mapping.get('leaves'): - elementDict[tag] = element.text - return elementDict + try: + element_dict[goAMLmapping[aml_type][tag]] = element.text + except KeyError: + pass + return element_dict + + @staticmethod + def fill_transaction(element, element_dict, tag): + if 't_from' in tag: + element_dict['from-funds-code'] = element.find('from_funds_code').text + element_dict['from-country'] = element.find('from_country').text + if 't_to' in tag: + element_dict['to-funds-code'] = element.find('to_funds_code').text + element_dict['to-country'] = element.find('to_country').text def handler(q=False): if q is False: From 8f5c08e2c6c52e5f06f6cbc1ff037fe86d354856 Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Wed, 28 Feb 2018 15:07:55 +0100 Subject: [PATCH 14/34] Converting GoAML into MISPEvent --- .../modules/import_mod/goamlimport.py | 95 +++++++++++-------- 1 file changed, 58 insertions(+), 37 deletions(-) diff --git a/misp_modules/modules/import_mod/goamlimport.py b/misp_modules/modules/import_mod/goamlimport.py index 4cda375..a9174db 100644 --- a/misp_modules/modules/import_mod/goamlimport.py +++ b/misp_modules/modules/import_mod/goamlimport.py @@ -1,7 +1,7 @@ import json, datetime import xml.etree.ElementTree as ET from collections import defaultdict -from pymisp import MISPEvent +from pymisp import MISPEvent, MISPObject misperrors = {'error': 'Error'} moduleinfo = {'version': 1, 'author': 'Christian Studer', @@ -18,12 +18,12 @@ t_person_objects = {'nodes': ['addresses'], 'leaves': ['first_name', 'middle_name', 'last_name', 'gender', 'title', 'mothers_name', 'birthdate', 'passport_number', 'passport_country', 'id_number', 'birth_place', 'alias', 'nationality1']} t_account_objects = {'nodes': ['signatory'], - 'leaves': ['institution_name', 'institution_code', 'swift', 'branch', 'non_banking_insitution', - 'account', 'currency_code', 'account_name', 'iban', 'client_number', 'opened', 'closed', - 'personal_account_type', 'balance', 'date_balance', 'status_code', 'beneficiary', - 'beneficiary_comment', 'comments']} + 'leaves': ['institution_name', 'institution_code', 'swift', 'branch', 'non_banking_insitution', + 'account', 'currency_code', 'account_name', 'iban', 'client_number', 'opened', 'closed', + 'personal_account_type', 'balance', 'date_balance', 'status_code', 'beneficiary', + 'beneficiary_comment', 'comments']} entity_objects = {'nodes': ['addresses'], - 'leaves': ['name', 'commercial_name', 'incorporation_legal_form', 'incorporation_number', 'business', 'phone']} + 'leaves': ['name', 'commercial_name', 'incorporation_legal_form', 'incorporation_number', 'business', 'phone']} goAMLobjects = {'report': {'nodes': ['reporting_person', 'location'], 'leaves': ['rentity_id', 'submission_code', 'report_code', 'submission_date', 'currency_code_local']}, @@ -42,30 +42,30 @@ goAMLobjects = {'report': {'nodes': ['reporting_person', 'location'], 'from_entity': entity_objects, 'to_entity': entity_objects, } -t_account_mapping = {'t_account': 'bank-account', 'institution_name': 'institution-name', 'institution_code': 'institution-code', +t_account_mapping = {'misp_name': 'bank-account', 'institution_name': 'institution-name', 'institution_code': 'institution-code', 'iban': 'iban', 'swift': 'swift', 'branch': 'branch', 'non_banking_institution': 'non-bank-institution', 'account': 'account', 'currency_code': 'currency-code', 'account_name': 'account-name', 'client_number': 'client-number', 'personal_account_type': 'personal-account-type', 'opened': 'opened', 'closed': 'closed', 'balance': 'balance', 'status_code': 'status-code', 'beneficiary': 'beneficiary', 'beneficiary_comment': 'beneficiary-comment', 'comments': 'comments'} -t_person_mapping = {'t_person': 'person', 'comments': 'text', 'first_name': 'first-name', 'middle_name': 'middle-name', +t_person_mapping = {'misp_name': 'person', 'comments': 'text', 'first_name': 'first-name', 'middle_name': 'middle-name', 'last_name': 'last-name', 'title': 'title', 'mothers_name': 'mothers-name', 'alias': 'alias', 'birthdate': 'date-of-birth', 'birth_place': 'place-of-birth', 'gender': 'gender','nationality1': 'nationality', 'passport_number': 'passport-number', 'passport_country': 'passport-country', 'ssn': 'social-security-number', 'id_number': 'identity-card-number'} -location_mapping = {'location': 'geolocation', 'city': 'city', 'state': 'region', 'country-code': 'country', 'address': 'address', - 'zip': 'zipcode'} +location_mapping = {'misp_name': 'geolocation', 'city': 'city', 'state': 'region', 'country_code': 'country', 'address': 'address', + 'zip': 'zipcode'} -t_entity_mapping = {'entity': 'legal-entity', 'name': 'name', 'business': 'business', 'commercial_name': 'commercial-name', +t_entity_mapping = {'misp_name': 'legal-entity', 'name': 'name', 'business': 'business', 'commercial_name': 'commercial-name', 'phone': 'phone-number', 'incorporation_legal_form': 'legal-form', 'incorporation_number': 'registration-number'} -goAMLmapping = {'from_account': t_account_mapping, 'to_account': t_account_mapping, +goAMLmapping = {'from_account': t_account_mapping, 'to_account': t_account_mapping, 't_person': t_person_mapping, 'from_person': t_person_mapping, 'to_person': t_person_mapping, 'reporting_person': t_person_mapping, 'from_entity': t_entity_mapping, 'to_entity': t_entity_mapping, 'location': location_mapping, 'address': location_mapping, - 'transaction': {'transaction': 'transaction', 'transactionnumber': 'transaction-number', 'date_transaction': 'date', + 'transaction': {'misp_name': 'transaction', 'transactionnumber': 'transaction-number', 'date_transaction': 'date', 'transaction_location': 'location', 'transmode_code': 'transmode-code', 'amount_local': 'amount', 'transmode_comment': 'transmode-comment', 'date_posting': 'date-posting', 'teller': 'teller', 'authorized': 'authorized', 'transaction_description': 'text'}} @@ -74,43 +74,64 @@ nodes_to_ignore = ['addresses', 'signatory'] class GoAmlParser(): def __init__(self): - self.dict = {} self.misp_event = MISPEvent() def readFile(self, filename): self.tree = ET.parse(filename).getroot() def parse_xml(self): - self.dict = self.itterate(self.tree, 'report') - self.dict['transaction'] = [] + self.first_itteration() for t in self.tree.findall('transaction'): - self.dict['transaction'].append(self.itterate(t, 'transaction')) - self.misp_event.timestamp = self.dict.get('submission_date') + self.itterate(t, 'transaction') + + def first_itteration(self): + self.misp_event.timestamp = self.tree.find('submission_date').text + for node in goAMLobjects['report']['nodes']: + element = self.tree.find(node) + if element is not None: + self.itterate(element, element.tag) def itterate(self, tree, aml_type): - element_dict = {} - for element in tree: - tag = element.tag - mapping = goAMLobjects.get(aml_type) - if tag in mapping.get('nodes'): + objects = goAMLobjects[aml_type] + if aml_type not in nodes_to_ignore: + try: + mapping = goAMLmapping[aml_type] + misp_object = MISPObject(name=mapping['misp_name']) + for leaf in objects['leaves']: + element = tree.find(leaf) + if element is not None: + object_relation = mapping[element.tag] + attribute = {'object_relation': object_relation, 'value': element.text} + misp_object.add_attribute(**attribute) if aml_type == 'transaction': - self.fill_transaction(element, element_dict, tag) - element_dict[tag] = self.itterate(element, tag) - elif tag in mapping.get('leaves'): - try: - element_dict[goAMLmapping[aml_type][tag]] = element.text - except KeyError: - pass - return element_dict + for node in objects['nodes']: + element = tree.find(node) + if element is not None: + self.fill_transaction(element, element.tag, misp_object) + self.misp_event.add_object(misp_object) + except KeyError: + pass + for node in objects['nodes']: + element = tree.find(node) + if element is not None: + self.itterate(element, element.tag) @staticmethod - def fill_transaction(element, element_dict, tag): + def fill_transaction(element, tag, misp_object): if 't_from' in tag: - element_dict['from-funds-code'] = element.find('from_funds_code').text - element_dict['from-country'] = element.find('from_country').text + from_funds = element.find('from_funds_code').text + from_funds_attribute = {'object_relation': 'from-funds-code', 'value': from_funds} + misp_object.add_attribute(**from_funds_attribute) + from_country = element.find('from_country').text + from_country_attribute = {'object_relation': 'from-country', 'value': from_country} + misp_object.add_attribute(**from_country_attribute) if 't_to' in tag: - element_dict['to-funds-code'] = element.find('to_funds_code').text - element_dict['to-country'] = element.find('to_country').text + to_funds = element.find('to_funds_code').text + to_funds_attribute = {'object_relation': 'to-funds-code', 'value': to_funds} + misp_object.add_attribute(**to_funds_attribute) + to_country = element.find('to_country').text + to_country_attribute = {'object_relation': 'to-country', 'value': to_country} + misp_object.add_attribute(**to_country_attribute) def handler(q=False): if q is False: @@ -128,7 +149,7 @@ def handler(q=False): misperrors['error'] = "Impossible to read the file" return misperrors aml_parser.parse_xml() - return aml_parser.dict + return aml_parser.misp_event.to_json() def introspection(): return mispattributes From 323f71cdd3f1a253a520af07b6f7a56a262be693 Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Wed, 28 Feb 2018 17:41:45 +0100 Subject: [PATCH 15/34] Fixed some details about the module output --- misp_modules/modules/import_mod/goamlimport.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/misp_modules/modules/import_mod/goamlimport.py b/misp_modules/modules/import_mod/goamlimport.py index a9174db..10bdaab 100644 --- a/misp_modules/modules/import_mod/goamlimport.py +++ b/misp_modules/modules/import_mod/goamlimport.py @@ -1,4 +1,4 @@ -import json, datetime +import json, datetime, time import xml.etree.ElementTree as ET from collections import defaultdict from pymisp import MISPEvent, MISPObject @@ -85,7 +85,8 @@ class GoAmlParser(): self.itterate(t, 'transaction') def first_itteration(self): - self.misp_event.timestamp = self.tree.find('submission_date').text + submission_date = self.tree.find('submission_date').text.split('+')[0] + self.misp_event.timestamp = int(time.mktime(time.strptime(submission_date, "%Y-%m-%dT%H:%M:%S"))) for node in goAMLobjects['report']['nodes']: element = self.tree.find(node) if element is not None: @@ -149,7 +150,8 @@ def handler(q=False): misperrors['error'] = "Impossible to read the file" return misperrors aml_parser.parse_xml() - return aml_parser.misp_event.to_json() + r = {'results': [{'types': mispattributes['output'], 'values': aml_parser.misp_event.to_json()}]} + return r def introspection(): return mispattributes From 03d20856d9c71b2778e4b5d13b29099c0b0a0f56 Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Wed, 28 Feb 2018 22:46:39 +0100 Subject: [PATCH 16/34] add: added goamlimport --- misp_modules/modules/import_mod/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/misp_modules/modules/import_mod/__init__.py b/misp_modules/modules/import_mod/__init__.py index 8acccbd..886eaf7 100644 --- a/misp_modules/modules/import_mod/__init__.py +++ b/misp_modules/modules/import_mod/__init__.py @@ -1,4 +1,4 @@ from . import _vmray -__all__ = ['vmray_import', 'testimport', 'ocr', 'stiximport', 'cuckooimport', +__all__ = ['vmray_import', 'testimport', 'ocr', 'stiximport', 'cuckooimport', 'goamlimport', 'email_import', 'mispjson', 'openiocimport', 'threatanalyzer_import', 'csvimport'] From e6c55f5ddec38bf56df2a9ff6ba65b8ff213170e Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Fri, 2 Mar 2018 09:03:51 +0100 Subject: [PATCH 17/34] fix: Fixed input & output of the module Also updated some functions --- misp_modules/modules/import_mod/csvimport.py | 43 ++++++++------------ 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py index dc67eec..3773530 100644 --- a/misp_modules/modules/import_mod/csvimport.py +++ b/misp_modules/modules/import_mod/csvimport.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- -import json, os +import json, os, base64 import pymisp misperrors = {'error': 'Error'} -mispattributes = {'input': ['file'], 'output': ['MISP attributes']} +mispattributes = {'inputSource': ['file'], 'output': ['MISP attributes']} moduleinfo = {'version': '0.1', 'author': 'Christian Studer', 'description': 'Import Attributes from a csv file.', 'module-type': ['import']} @@ -16,39 +16,32 @@ def handler(q=False): if q is False: return False request = json.loads(q) - if request.get('file'): - filename = request['file'] + if request.get('data'): + data = base64.b64decode(request['data']).decode('utf-8') else: misperrors['error'] = "Unsupported attributes type" return misperrors if not request.get('config') and not request['config'].get('header'): misperrors['error'] = "Configuration error" return misperrors - config = request['config'].get('header') - #header = [] - try: - data = readFile(filename, 'utf-8') - except: - data = readFile(filename, 'iso-8859-1') + config = request['config'].get('header').split(',') + config = [c.strip() for c in config] + data = parse_data(data.split('\n')) # find which delimiter is used delimiter, length = findDelimiter(config, data) # build the attributes result = buildAttributes(config, data, delimiter, length) - r = {'results': [{'types': mispattributes['output'], 'values': result}]} + r = {'results': result} return r -def readFile(filename, encoding): - data = [] - with open(filename, 'r', encoding=encoding) as f: - for line in f: - # split comments from data - if '#' in line: - l = line.split('#')[0].strip() - else: - l = line.strip() - if l: - data.append(l) - return data +def parse_data(data): + return_data = [] + for line in data: + l = line.split('#')[0].strip() if '#' in line else line.strip() + if l: + return_data.append(l) + print(len(return_data)) + return return_data def findDelimiter(header, data): n = len(header) @@ -74,7 +67,7 @@ def buildAttributes(header, dataValues, delimiter, length): for data in dataValues: d = data.strip() if d: - attributes.append({'type': mispType, 'value': d}) + attributes.append({'types': mispType, 'values': d}) else: # split fields that should be recognized as misp attribute types from the others list2pop, misp, head = findMispTypes(header) @@ -90,7 +83,7 @@ def buildAttributes(header, dataValues, delimiter, length): datamisp.append(datasplit.pop(l).strip()) # for each misp type, we create an attribute for m, dm in zip(misp, datamisp): - attribute = {'type': m, 'value': dm} + attribute = {'types': m, 'values': dm} for h, ds in zip(head, datasplit): if h: attribute[h] = ds.strip() From c9ef57826219878acab6fa69a2e643086c35b818 Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Fri, 2 Mar 2018 09:09:12 +0100 Subject: [PATCH 18/34] Removed print --- misp_modules/modules/import_mod/csvimport.py | 1 - 1 file changed, 1 deletion(-) diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py index 3773530..5cfbc67 100644 --- a/misp_modules/modules/import_mod/csvimport.py +++ b/misp_modules/modules/import_mod/csvimport.py @@ -40,7 +40,6 @@ def parse_data(data): l = line.split('#')[0].strip() if '#' in line else line.strip() if l: return_data.append(l) - print(len(return_data)) return return_data def findDelimiter(header, data): From 82fe8ba78ca5d46643f3ef2c29ed51d82084cfd1 Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Fri, 2 Mar 2018 11:03:21 +0100 Subject: [PATCH 19/34] fix: Fixed input & output of the module --- misp_modules/modules/import_mod/goamlimport.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/misp_modules/modules/import_mod/goamlimport.py b/misp_modules/modules/import_mod/goamlimport.py index 10bdaab..9b2a34c 100644 --- a/misp_modules/modules/import_mod/goamlimport.py +++ b/misp_modules/modules/import_mod/goamlimport.py @@ -1,4 +1,4 @@ -import json, datetime, time +import json, datetime, time, base64 import xml.etree.ElementTree as ET from collections import defaultdict from pymisp import MISPEvent, MISPObject @@ -8,7 +8,7 @@ moduleinfo = {'version': 1, 'author': 'Christian Studer', 'description': 'Import from GoAML', 'module-type': ['import']} moduleconfig = [] -mispattributes = {'input': ['xml file'], 'output': ['MISPEvent']} +mispattributes = {'inputSource': ['file'], 'output': ['MISP objects']} t_from_objects = {'nodes': ['from_person', 'from_account', 'from_entity'], 'leaves': ['from_funds_code', 'from_country']} @@ -76,8 +76,8 @@ class GoAmlParser(): def __init__(self): self.misp_event = MISPEvent() - def readFile(self, filename): - self.tree = ET.parse(filename).getroot() + def read_xml(self, data): + self.tree = ET.fromstring(data) def parse_xml(self): self.first_itteration() @@ -138,19 +138,19 @@ def handler(q=False): if q is False: return False request = json.loads(q) - if request.get('file'): - filename = request['file'] + if request.get('data'): + data = base64.b64decode(request['data']).decode('utf-8') else: misperrors['error'] = "Unsupported attributes type" return misperrors aml_parser = GoAmlParser() try: - aml_parser.readFile(filename) + aml_parser.read_xml(data) except: - misperrors['error'] = "Impossible to read the file" + misperrors['error'] = "Impossible to read XML data" return misperrors aml_parser.parse_xml() - r = {'results': [{'types': mispattributes['output'], 'values': aml_parser.misp_event.to_json()}]} + r = {'results': [obj.to_json() for obj in aml_parser.misp_event.objects]} return r def introspection(): From 4d7642ac91e6f8694ee83b050368e8a6bfff944e Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Mon, 5 Mar 2018 14:58:31 +0100 Subject: [PATCH 20/34] add: Added Object References in the objects imported --- misp_modules/modules/import_mod/goamlimport.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/misp_modules/modules/import_mod/goamlimport.py b/misp_modules/modules/import_mod/goamlimport.py index 9b2a34c..ecb0a2d 100644 --- a/misp_modules/modules/import_mod/goamlimport.py +++ b/misp_modules/modules/import_mod/goamlimport.py @@ -71,6 +71,7 @@ goAMLmapping = {'from_account': t_account_mapping, 'to_account': t_account_mappi 'authorized': 'authorized', 'transaction_description': 'text'}} nodes_to_ignore = ['addresses', 'signatory'] +relationship_to_keep = ['signatory', 't_from', 't_from_my_client', 't_to', 't_to_my_client', 'address'] class GoAmlParser(): def __init__(self): @@ -92,8 +93,10 @@ class GoAmlParser(): if element is not None: self.itterate(element, element.tag) - def itterate(self, tree, aml_type): + def itterate(self, tree, aml_type, referencing_uuid=None, relationship_type=None): objects = goAMLobjects[aml_type] + referenced_uuid = referencing_uuid + rel = relationship_type if aml_type not in nodes_to_ignore: try: mapping = goAMLmapping[aml_type] @@ -110,12 +113,20 @@ class GoAmlParser(): if element is not None: self.fill_transaction(element, element.tag, misp_object) self.misp_event.add_object(misp_object) + last_object = self.misp_event.objects[-1] + referenced_uuid = last_object.uuid + if referencing_uuid and relationship_type: + referencing_object = self.misp_event.get_object_by_uuid(referencing_uuid) + referencing_object.add_reference(referenced_uuid, rel, None, **last_object) except KeyError: pass for node in objects['nodes']: element = tree.find(node) if element is not None: - self.itterate(element, element.tag) + tag = element.tag + if tag in relationship_to_keep: + rel = tag[2:] if tag.startswith('t_') else tag + self.itterate(element, element.tag, referencing_uuid=referenced_uuid, relationship_type=rel) @staticmethod def fill_transaction(element, tag, misp_object): From d8852867927399372911221756a9b38774af90d7 Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Mon, 5 Mar 2018 19:59:30 +0100 Subject: [PATCH 21/34] Clarified functions arguments using a class --- misp_modules/modules/import_mod/csvimport.py | 180 ++++++++++--------- 1 file changed, 92 insertions(+), 88 deletions(-) diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py index 5cfbc67..543d67b 100644 --- a/misp_modules/modules/import_mod/csvimport.py +++ b/misp_modules/modules/import_mod/csvimport.py @@ -10,7 +10,94 @@ moduleinfo = {'version': '0.1', 'author': 'Christian Studer', moduleconfig = ['header'] duplicatedFields = {'mispType': {'mispComment': 'comment'}, - 'attrField': {'eventComment': 'comment'}} + 'attrField': {'attrComment': 'comment'}} + +class CsvParser(): + def __init__(self, header): + self.header = header + self.attributes = [] + + def parse_data(self, data): + return_data = [] + for line in data: + l = line.split('#')[0].strip() if '#' in line else line.strip() + if l: + return_data.append(l) + self.data = return_data + # find which delimiter is used + self.delimiter, self.length = self.findDelimiter() + + def findDelimiter(self): + n = len(self.header) + if n > 1: + tmpData = [] + for da in self.data: + tmp = [] + for d in (';', '|', '/', ',', '\t', ' ',): + if da.count(d) == (n-1): + tmp.append(d) + if len(tmp) == 1 and tmp == tmpData: + return tmpData[0], n + else: + tmpData = tmp + else: + return None, 1 + + def buildAttributes(self): + # if there is only 1 field of data + if self.delimiter is None: + mispType = self.header[0] + for data in self.data: + d = data.strip() + if d: + self.attributes.append({'types': mispType, 'values': d}) + else: + # split fields that should be recognized as misp attribute types from the others + list2pop, misp, head = self.findMispTypes() + # for each line of data + for data in self.data: + datamisp = [] + datasplit = data.split(self.delimiter) + # in case there is an empty line or an error + if len(datasplit) != self.length: + continue + # pop from the line data that matches with a misp type, using the list of indexes + for l in list2pop: + datamisp.append(datasplit.pop(l).strip()) + # for each misp type, we create an attribute + for m, dm in zip(misp, datamisp): + attribute = {'types': m, 'values': dm} + for h, ds in zip(head, datasplit): + if h: + attribute[h] = ds.strip() + self.attributes.append(attribute) + + def findMispTypes(self): + descFilename = os.path.join(pymisp.__path__[0], 'data/describeTypes.json') + with open(descFilename, 'r') as f: + MispTypes = json.loads(f.read())['result'].get('types') + list2pop = [] + misp = [] + head = [] + for h in reversed(self.header): + n = self.header.index(h) + # fields that are misp attribute types + if h in MispTypes: + list2pop.append(n) + misp.append(h) + # handle confusions between misp attribute types and attribute fields + elif h in duplicatedFields['mispType']: + # fields that should be considered as misp attribute types + list2pop.append(n) + misp.append(duplicatedFields['mispType'].get(h)) + elif h in duplicatedFields['attrField']: + # fields that should be considered as attribute fields + head.append(duplicatedFields['attrField'].get(h)) + # otherwise, it is an attribute field + else: + head.append(h) + # return list of indexes of the misp types, list of the misp types, remaining fields that will be attribute fields + return list2pop, misp, list(reversed(head)) def handler(q=False): if q is False: @@ -26,96 +113,13 @@ def handler(q=False): return misperrors config = request['config'].get('header').split(',') config = [c.strip() for c in config] - data = parse_data(data.split('\n')) - # find which delimiter is used - delimiter, length = findDelimiter(config, data) + csv_parser = CsvParser(config) + csv_parser.parse_data(data.split('\n')) # build the attributes - result = buildAttributes(config, data, delimiter, length) - r = {'results': result} + csv_parser.buildAttributes() + r = {'results': csv_parser.attributes} return r -def parse_data(data): - return_data = [] - for line in data: - l = line.split('#')[0].strip() if '#' in line else line.strip() - if l: - return_data.append(l) - return return_data - -def findDelimiter(header, data): - n = len(header) - if n > 1: - tmpData = [] - for da in data: - tmp = [] - for d in (';', '|', '/', ',', '\t', ' ',): - if da.count(d) == (n-1): - tmp.append(d) - if len(tmp) == 1 and tmp == tmpData: - return tmpData[0], n - else: - tmpData = tmp - else: - return None, 1 - -def buildAttributes(header, dataValues, delimiter, length): - attributes = [] - # if there is only 1 field of data - if delimiter is None: - mispType = header[0] - for data in dataValues: - d = data.strip() - if d: - attributes.append({'types': mispType, 'values': d}) - else: - # split fields that should be recognized as misp attribute types from the others - list2pop, misp, head = findMispTypes(header) - # for each line of data - for data in dataValues: - datamisp = [] - datasplit = data.split(delimiter) - # in case there is an empty line or an error - if len(datasplit) != length: - continue - # pop from the line data that matches with a misp type, using the list of indexes - for l in list2pop: - datamisp.append(datasplit.pop(l).strip()) - # for each misp type, we create an attribute - for m, dm in zip(misp, datamisp): - attribute = {'types': m, 'values': dm} - for h, ds in zip(head, datasplit): - if h: - attribute[h] = ds.strip() - attributes.append(attribute) - return attributes - -def findMispTypes(header): - descFilename = os.path.join(pymisp.__path__[0], 'data/describeTypes.json') - with open(descFilename, 'r') as f: - MispTypes = json.loads(f.read())['result'].get('types') - list2pop = [] - misp = [] - head = [] - for h in reversed(header): - n = header.index(h) - # fields that are misp attribute types - if h in MispTypes: - list2pop.append(n) - misp.append(h) - # handle confusions between misp attribute types and attribute fields - elif h in duplicatedFields['mispType']: - # fields that should be considered as misp attribute types - list2pop.append(n) - misp.append(duplicatedFields['mispType'].get(h)) - elif h in duplicatedFields['attrField']: - # fields that should be considered as attribute fields - head.append(duplicatedFields['attrField'].get(h)) - # otherwise, it is an attribute field - else: - head.append(h) - # return list of indexes of the misp types, list of the misp types, remaining fields that will be attribute fields - return list2pop, misp, list(reversed(head)) - def introspection(): return mispattributes From 0436118747d4a199e8b42e0b19840767dc824859 Mon Sep 17 00:00:00 2001 From: "x41\\x43" Date: Tue, 6 Mar 2018 18:12:36 +0100 Subject: [PATCH 22/34] Improving regex (validating e-mail) Line 48: The previous regex ` ^[\w\.\+\-]+\@[\w]+\.[a-z]{2,3}$ ` matched only a small subset of valid e-mail address (e.g.: didn't match domain names longer than 3 chars or user@this-domain.de or user@multiple.level.dom) and needed to be with start (^) and end ($). This ` [a-zA-Z0-9!#$%&'*+\/=?^_`{|}~-]+(?:\.[a-zA-Z0-9!#$%&'*+\/=?^_`{|}~-]+)*@(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?\.)+[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])? ` is not perfect (e.g: can't match oriental chars), but imho is much more complete. Regex tested with several e-mail addresses with Python 3.6.4 and Python 2.7.14 on Linux 4.14. --- misp_modules/modules/expansion/otx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/misp_modules/modules/expansion/otx.py b/misp_modules/modules/expansion/otx.py index ad9da2f..214e7f0 100755 --- a/misp_modules/modules/expansion/otx.py +++ b/misp_modules/modules/expansion/otx.py @@ -45,7 +45,7 @@ def findAll(data, keys): return a def valid_email(email): - return bool(re.search(r"^[\w\.\+\-]+\@[\w]+\.[a-z]{2,3}$", email)) + return bool(re.search(r"[a-zA-Z0-9!#$%&'*+\/=?^_`{|}~-]+(?:\.[a-zA-Z0-9!#$%&'*+\/=?^_`{|}~-]+)*@(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?\.)+[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?", email)) def handler(q=False): if q is False: From d0f618b6480a306743e864f7f9cfb769c676c5eb Mon Sep 17 00:00:00 2001 From: Fred Morris Date: Thu, 8 Mar 2018 15:26:39 -0800 Subject: [PATCH 23/34] Add exception blocks for query errors. --- .../modules/expansion/farsight_passivedns.py | 45 +++++++++++-------- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/misp_modules/modules/expansion/farsight_passivedns.py b/misp_modules/modules/expansion/farsight_passivedns.py index c76c752..2518771 100755 --- a/misp_modules/modules/expansion/farsight_passivedns.py +++ b/misp_modules/modules/expansion/farsight_passivedns.py @@ -1,5 +1,5 @@ import json -from ._dnsdb_query.dnsdb_query import DnsdbClient +from ._dnsdb_query.dnsdb_query import DnsdbClient, QueryError misperrors = {'error': 'Error'} @@ -41,26 +41,35 @@ def handler(q=False): def lookup_name(client, name): - res = client.query_rrset(name) # RRSET = entries in the left-hand side of the domain name related labels - for item in res: - if item.get('rrtype') in ['A', 'AAAA', 'CNAME']: - for i in item.get('rdata'): - yield(i.rstrip('.')) - if item.get('rrtype') in ['SOA']: - for i in item.get('rdata'): - # grab email field and replace first dot by @ to convert to an email address - yield(i.split(' ')[1].rstrip('.').replace('.', '@', 1)) - # res = client.query_rdata_name(name) # RDATA = entries on the right-hand side of the domain name related labels - # for item in res: - # if item.get('rrtype') in ['A', 'AAAA', 'CNAME']: - # yield(item.get('rrname').rstrip('.')) + try: + res = client.query_rrset(name) # RRSET = entries in the left-hand side of the domain name related labels + for item in res: + if item.get('rrtype') in ['A', 'AAAA', 'CNAME']: + for i in item.get('rdata'): + yield(i.rstrip('.')) + if item.get('rrtype') in ['SOA']: + for i in item.get('rdata'): + # grab email field and replace first dot by @ to convert to an email address + yield(i.split(' ')[1].rstrip('.').replace('.', '@', 1)) + except QueryError as e: + pass + + try: + res = client.query_rdata_name(name) # RDATA = entries on the right-hand side of the domain name related labels + for item in res: + if item.get('rrtype') in ['A', 'AAAA', 'CNAME']: + yield(item.get('rrname').rstrip('.')) + except QueryError as e: + pass def lookup_ip(client, ip): - res = client.query_rdata_ip(ip) - for item in res: - print(item) - yield(item['rrname'].rstrip('.')) + try: + res = client.query_rdata_ip(ip) + for item in res: + yield(item['rrname'].rstrip('.')) + except QueryError as e: + pass def introspection(): From 6d23d4f4c7c553d337606be1506a0965baf114cd Mon Sep 17 00:00:00 2001 From: Koen Van Impe Date: Fri, 30 Mar 2018 15:11:25 +0200 Subject: [PATCH 24/34] Fix VMRay API access error hotfix for the "Unable to access VMRay API" error --- misp_modules/modules/import_mod/vmray_import.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/misp_modules/modules/import_mod/vmray_import.py b/misp_modules/modules/import_mod/vmray_import.py index a263294..88caa8f 100644 --- a/misp_modules/modules/import_mod/vmray_import.py +++ b/misp_modules/modules/import_mod/vmray_import.py @@ -93,7 +93,10 @@ def handler(q=False): analysis_data = vmrayDownloadAnalysis(api, analysis_id) if analysis_data: - p = vmrayVtiPatterns(analysis_data["vti_patterns"]) + if "analysis_vti_patterns" in analysis_data: + p = vmrayVtiPatterns(analysis_data["analysis_vti_patterns"]) + else: + p = vmrayVtiPatterns(analysis_data["vti_patterns"]) if p and len(p["results"]) > 0: vti_patterns_found = True vmray_results = {'results': vmray_results["results"] + p["results"]} From 252d19071495d678ded05841ed492b23613f67f1 Mon Sep 17 00:00:00 2001 From: Nick Driver Date: Fri, 30 Mar 2018 14:27:37 -0400 Subject: [PATCH 25/34] fix missing comma fix ip-dst and vulnerability input --- misp_modules/modules/expansion/xforceexchange.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/misp_modules/modules/expansion/xforceexchange.py b/misp_modules/modules/expansion/xforceexchange.py index 459c69a..d027f99 100644 --- a/misp_modules/modules/expansion/xforceexchange.py +++ b/misp_modules/modules/expansion/xforceexchange.py @@ -14,7 +14,7 @@ extensions = {"ip1": "ipr/%s", sys.path.append('./') misperrors = {'error': 'Error'} -mispattributes = {'input': ['ip-src','ip-dst' 'vulnerability', 'md5', 'sha1', 'sha256'], +mispattributes = {'input': ['ip-src', 'ip-dst', 'vulnerability', 'md5', 'sha1', 'sha256'], 'output': ['ip-src', 'ip-dst', 'text', 'domain']} # possible module-types: 'expansion', 'hover' or both From 370011c0817f8f773eed0f1406c2fea85ca94bf5 Mon Sep 17 00:00:00 2001 From: Christophe Vandeplas Date: Wed, 2 May 2018 12:43:34 +0200 Subject: [PATCH 26/34] threatanalyzer_import - fix regkey issue --- misp_modules/modules/import_mod/threatanalyzer_import.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/misp_modules/modules/import_mod/threatanalyzer_import.py b/misp_modules/modules/import_mod/threatanalyzer_import.py index fded508..fd16246 100755 --- a/misp_modules/modules/import_mod/threatanalyzer_import.py +++ b/misp_modules/modules/import_mod/threatanalyzer_import.py @@ -457,8 +457,6 @@ def cleanup_regkey(item): r'\\Local Settings\\Software\\Microsoft\\Windows\\Shell\\Bag', r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\RunMRU\\' } - item = item.replace('\\REGISTRY\\MACHINE\\', 'HKLM\\') - item = item.replace('\\REGISTRY\\USER\\', 'HKCU\\') if list_in_string(noise_substrings, item, regex=True): return None return item From 27a22e5d86ecb1b7c2f81cfa1f02401464270cd1 Mon Sep 17 00:00:00 2001 From: Christophe Vandeplas Date: Thu, 3 May 2018 09:42:38 +0200 Subject: [PATCH 27/34] threatanalyzer_import - loads sample info + pollution fix --- .../import_mod/threatanalyzer_import.py | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/misp_modules/modules/import_mod/threatanalyzer_import.py b/misp_modules/modules/import_mod/threatanalyzer_import.py index fd16246..da01b93 100755 --- a/misp_modules/modules/import_mod/threatanalyzer_import.py +++ b/misp_modules/modules/import_mod/threatanalyzer_import.py @@ -62,12 +62,12 @@ def handler(q=False): if re.match(r"Analysis/proc_\d+/modified_files/.+\.", zip_file_name) and "mapping.log" not in zip_file_name: sample_md5 = zip_file_name.split('/')[-1].split('.')[0] if sample_md5 in modified_files_mapping: - sample_filename = modified_files_mapping[sample_md5] - # print("{} maps to {}".format(sample_md5, sample_filename)) + current_sample_filename = modified_files_mapping[sample_md5] + # print("{} maps to {}".format(sample_md5, current_sample_filename)) with zf.open(zip_file_name, mode='r', pwd=None) as fp: file_data = fp.read() results.append({ - 'values': sample_filename, + 'values': current_sample_filename, 'data': base64.b64encode(file_data).decode(), 'type': 'malware-sample', 'categories': ['Artifacts dropped', 'Payload delivery'], 'to_ids': True, 'comment': ''}) @@ -76,8 +76,18 @@ def handler(q=False): file_data = fp.read() analysis_json = json.loads(file_data.decode('utf-8')) results += process_analysis_json(analysis_json) - # if 'sample' in zip_file_name: - # sample['data'] = base64.b64encode(file_data).decode() + try: + sample_filename = analysis_json.get('analysis').get('@filename') + if sample_filename: + with zf.open('sample', mode='r', pwd=None) as fp: + file_data = fp.read() + results.append({ + 'values': sample_filename, + 'data': base64.b64encode(file_data).decode(), + 'type': 'malware-sample', 'categories': ['Artifacts dropped', 'Payload delivery'], 'to_ids': True, 'comment': ''}) + except Exception as e: + # no 'sample' in archive, might be an url analysis, just ignore + pass else: try: @@ -455,7 +465,9 @@ def cleanup_regkey(item): r'\\Software\\Classes\\CLSID\\', r'\\Software\\Classes\\Local Settings\\MuiCache\\', r'\\Local Settings\\Software\\Microsoft\\Windows\\Shell\\Bag', - r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\RunMRU\\' + r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\RunMRU\\', + r'\\Software\\Microsoft\\Tracing\\powershell_RASMANCS\\', + r'\\Software\\Microsoft\\Tracing\\powershell_RASAPI32\\' } if list_in_string(noise_substrings, item, regex=True): return None From 7b4db1ce5ce99491a47b2cdb006a5bf80542e676 Mon Sep 17 00:00:00 2001 From: Christophe Vandeplas Date: Tue, 15 May 2018 12:59:55 +0200 Subject: [PATCH 28/34] threatanalyzer_import - minor generic noise removal --- .../import_mod/threatanalyzer_import.py | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/misp_modules/modules/import_mod/threatanalyzer_import.py b/misp_modules/modules/import_mod/threatanalyzer_import.py index da01b93..326de89 100755 --- a/misp_modules/modules/import_mod/threatanalyzer_import.py +++ b/misp_modules/modules/import_mod/threatanalyzer_import.py @@ -15,7 +15,7 @@ misperrors = {'error': 'Error'} userConfig = {} inputSource = ['file'] -moduleinfo = {'version': '0.6', 'author': 'Christophe Vandeplas', +moduleinfo = {'version': '0.7', 'author': 'Christophe Vandeplas', 'description': 'Import for ThreatAnalyzer archive.zip/analysis.json files', 'module-type': ['import']} @@ -451,23 +451,24 @@ def cleanup_filepath(item): def cleanup_regkey(item): noise_substrings = { - r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Installer\\UserData\\', - r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Internet Settings\\', + r'\\CurrentVersion\\Explorer\\FileExts\\[a-z\.]+\\OpenWith', r'\\CurrentVersion\\Explorer\\RecentDocs\\', r'\\CurrentVersion\\Explorer\\UserAssist\\', - r'\\CurrentVersion\\Explorer\\FileExts\\[a-z\.]+\\OpenWith', - r'\\Software\\Microsoft\\Internet Explorer\\Main\\WindowsSearch', - r'\\Software\\Microsoft\\Office\\[0-9\.]+\\', - r'\\SOFTWARE\\Microsoft\\OfficeSoftwareProtectionPlatform\\', - r'\\Software\\Microsoft\\Office\\Common\\Smart Tag\\', - r'\\Usage\\SpellingAndGrammarFiles', - r'^HKLM\\Software\\Microsoft\\Tracing\\', + r'\\Local Settings\\Software\\Microsoft\\Windows\\Shell\\Bag', r'\\Software\\Classes\\CLSID\\', r'\\Software\\Classes\\Local Settings\\MuiCache\\', - r'\\Local Settings\\Software\\Microsoft\\Windows\\Shell\\Bag', - r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\RunMRU\\', + r'\\Software\\Microsoft\\Internet Explorer\\Main\\WindowsSearch', + r'\\Software\\Microsoft\\Office\\[0-9\.]+\\', + r'\\Software\\Microsoft\\Office\\Common\\Smart Tag\\', + r'\\SOFTWARE\\Microsoft\\OfficeSoftwareProtectionPlatform\\', + r'\\Software\\Microsoft\\Shared Tools\\Panose\\', + r'\\Software\\Microsoft\\Tracing\\', + r'\\Software\\Microsoft\\Tracing\\powershell_RASAPI32\\', r'\\Software\\Microsoft\\Tracing\\powershell_RASMANCS\\', - r'\\Software\\Microsoft\\Tracing\\powershell_RASAPI32\\' + r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\RunMRU\\', + r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Installer\\UserData\\', + r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Internet Settings\\', + r'\\Usage\\SpellingAndGrammarFiles' } if list_in_string(noise_substrings, item, regex=True): return None From 67cecc89d0ea492cd7f77fd42b4f91a9814e339a Mon Sep 17 00:00:00 2001 From: Christophe Vandeplas Date: Tue, 15 May 2018 12:59:55 +0200 Subject: [PATCH 29/34] threatanalyzer_import - minor generic noise removal --- .../import_mod/threatanalyzer_import.py | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/misp_modules/modules/import_mod/threatanalyzer_import.py b/misp_modules/modules/import_mod/threatanalyzer_import.py index da01b93..326de89 100755 --- a/misp_modules/modules/import_mod/threatanalyzer_import.py +++ b/misp_modules/modules/import_mod/threatanalyzer_import.py @@ -15,7 +15,7 @@ misperrors = {'error': 'Error'} userConfig = {} inputSource = ['file'] -moduleinfo = {'version': '0.6', 'author': 'Christophe Vandeplas', +moduleinfo = {'version': '0.7', 'author': 'Christophe Vandeplas', 'description': 'Import for ThreatAnalyzer archive.zip/analysis.json files', 'module-type': ['import']} @@ -451,23 +451,24 @@ def cleanup_filepath(item): def cleanup_regkey(item): noise_substrings = { - r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Installer\\UserData\\', - r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Internet Settings\\', + r'\\CurrentVersion\\Explorer\\FileExts\\[a-z\.]+\\OpenWith', r'\\CurrentVersion\\Explorer\\RecentDocs\\', r'\\CurrentVersion\\Explorer\\UserAssist\\', - r'\\CurrentVersion\\Explorer\\FileExts\\[a-z\.]+\\OpenWith', - r'\\Software\\Microsoft\\Internet Explorer\\Main\\WindowsSearch', - r'\\Software\\Microsoft\\Office\\[0-9\.]+\\', - r'\\SOFTWARE\\Microsoft\\OfficeSoftwareProtectionPlatform\\', - r'\\Software\\Microsoft\\Office\\Common\\Smart Tag\\', - r'\\Usage\\SpellingAndGrammarFiles', - r'^HKLM\\Software\\Microsoft\\Tracing\\', + r'\\Local Settings\\Software\\Microsoft\\Windows\\Shell\\Bag', r'\\Software\\Classes\\CLSID\\', r'\\Software\\Classes\\Local Settings\\MuiCache\\', - r'\\Local Settings\\Software\\Microsoft\\Windows\\Shell\\Bag', - r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\RunMRU\\', + r'\\Software\\Microsoft\\Internet Explorer\\Main\\WindowsSearch', + r'\\Software\\Microsoft\\Office\\[0-9\.]+\\', + r'\\Software\\Microsoft\\Office\\Common\\Smart Tag\\', + r'\\SOFTWARE\\Microsoft\\OfficeSoftwareProtectionPlatform\\', + r'\\Software\\Microsoft\\Shared Tools\\Panose\\', + r'\\Software\\Microsoft\\Tracing\\', + r'\\Software\\Microsoft\\Tracing\\powershell_RASAPI32\\', r'\\Software\\Microsoft\\Tracing\\powershell_RASMANCS\\', - r'\\Software\\Microsoft\\Tracing\\powershell_RASAPI32\\' + r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\RunMRU\\', + r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Installer\\UserData\\', + r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Internet Settings\\', + r'\\Usage\\SpellingAndGrammarFiles' } if list_in_string(noise_substrings, item, regex=True): return None From 0593dbb40807b01988b88902e8732e150538e6e2 Mon Sep 17 00:00:00 2001 From: Christophe Vandeplas Date: Wed, 16 May 2018 11:50:47 +0200 Subject: [PATCH 30/34] ta import - more filter for pollution --- .../import_mod/threatanalyzer_import.py | 28 +++++++++++-------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/misp_modules/modules/import_mod/threatanalyzer_import.py b/misp_modules/modules/import_mod/threatanalyzer_import.py index 326de89..757f849 100755 --- a/misp_modules/modules/import_mod/threatanalyzer_import.py +++ b/misp_modules/modules/import_mod/threatanalyzer_import.py @@ -421,20 +421,22 @@ def cleanup_url(item): def cleanup_filepath(item): noise_substrings = { - 'C:\\Windows\\Prefetch\\', - '\\AppData\\Roaming\\Microsoft\\Windows\\Recent\\', - '\\AppData\\Roaming\\Microsoft\\Office\\Recent\\', - 'C:\\ProgramData\\Microsoft\\OfficeSoftwareProtectionPlatform\\Cache\\cache.dat', - '\\AppData\\Local\\Microsoft\\Windows\\Temporary Internet Files\\Content.', - '\\AppData\\Local\\Microsoft\\Internet Explorer\\Recovery\\High\\', + '\\AppData\\Local\\GDIPFONTCACHEV1.DAT', '\\AppData\\Local\\Microsoft\\Internet Explorer\\DOMStore\\', - '\\AppData\\LocalLow\\Microsoft\\Internet Explorer\\Services\\search_', - '\\AppData\\Local\\Microsoft\\Windows\\History\\History.', - '\\AppData\\Roaming\\Microsoft\\Windows\\Cookies\\', - '\\AppData\\LocalLow\\Microsoft\\CryptnetUrlCache\\', + '\\AppData\\Local\\Microsoft\\Internet Explorer\\Recovery\\High\\', '\\AppData\\Local\\Microsoft\\Windows\\Caches\\', - '\\AppData\\Local\\Microsoft\\Windows\WebCache\\', '\\AppData\\Local\\Microsoft\\Windows\\Explorer\\thumbcache', + '\\AppData\\Local\\Microsoft\\Windows\\History\\History.', + '\\AppData\\Local\\Microsoft\\Windows\\Temporary Internet Files\\Content.', + '\\AppData\\Local\\Microsoft\\Windows\\WebCache\\', + '\\AppData\\Local\\Temp\\.*tmp$', + '\\AppData\\LocalLow\\Microsoft\\CryptnetUrlCache\\', + '\\AppData\\LocalLow\\Microsoft\\Internet Explorer\\Services\\search_', + '\\AppData\\Roaming\\Microsoft\\Office\\Recent\\', + '\\AppData\\Roaming\\Microsoft\\Windows\\Cookies\\', + '\\AppData\\Roaming\\Microsoft\\Windows\\Recent\\', + 'C:\\ProgramData\\Microsoft\\OfficeSoftwareProtectionPlatform\\Cache\\cache.dat', + 'C:\\Windows\\Prefetch\\', '\\AppData\\Roaming\\Adobe\\Acrobat\\9.0\\SharedDataEvents-journal', '\\AppData\\Roaming\\Adobe\\Acrobat\\9.0\\UserCache.bin', @@ -460,14 +462,16 @@ def cleanup_regkey(item): r'\\Software\\Microsoft\\Internet Explorer\\Main\\WindowsSearch', r'\\Software\\Microsoft\\Office\\[0-9\.]+\\', r'\\Software\\Microsoft\\Office\\Common\\Smart Tag\\', - r'\\SOFTWARE\\Microsoft\\OfficeSoftwareProtectionPlatform\\', + r'\\Software\\Microsoft\\OfficeSoftwareProtectionPlatform\\', r'\\Software\\Microsoft\\Shared Tools\\Panose\\', r'\\Software\\Microsoft\\Tracing\\', r'\\Software\\Microsoft\\Tracing\\powershell_RASAPI32\\', r'\\Software\\Microsoft\\Tracing\\powershell_RASMANCS\\', + r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Action Center\\', r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\RunMRU\\', r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Installer\\UserData\\', r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Internet Settings\\', + r'\\System\\CurrentControlSet\\Services\\RdyBoost\\', r'\\Usage\\SpellingAndGrammarFiles' } if list_in_string(noise_substrings, item, regex=True): From c088b13f0374aeb586e021a2aa21b10358b9b3f1 Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Thu, 17 May 2018 13:47:49 +0200 Subject: [PATCH 31/34] fix: Using userConfig to define the header instead of moduleconfig --- misp_modules/modules/import_mod/csvimport.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py index 543d67b..85b9c6b 100644 --- a/misp_modules/modules/import_mod/csvimport.py +++ b/misp_modules/modules/import_mod/csvimport.py @@ -3,11 +3,14 @@ import json, os, base64 import pymisp misperrors = {'error': 'Error'} -mispattributes = {'inputSource': ['file'], 'output': ['MISP attributes']} moduleinfo = {'version': '0.1', 'author': 'Christian Studer', 'description': 'Import Attributes from a csv file.', 'module-type': ['import']} -moduleconfig = ['header'] +moduleconfig = [] +inputSource = ['file'] +userConfig = {'header': { + 'type': 'String', + 'message': 'Define the header of the csv file, with types (included in MISP attribute types or attribute fields) separated by commas.\nFor fields that do not match these types, please use space or simply nothing between commas.\nFor instance: ip-src,domain, ,timestamp'}} duplicatedFields = {'mispType': {'mispComment': 'comment'}, 'attrField': {'attrComment': 'comment'}} @@ -121,7 +124,18 @@ def handler(q=False): return r def introspection(): - return mispattributes + modulesetup = {} + try: + userConfig + modulesetup['userConfig'] = userConfig + except NameError: + pass + try: + inputSource + modulesetup['inputSource'] = inputSource + except NameError: + pass + return modulesetup def version(): moduleinfo['config'] = moduleconfig From dba8bd8c5bb0b6cc9e24cd938421240059f5b9f8 Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Thu, 17 May 2018 16:24:11 +0200 Subject: [PATCH 32/34] fix: Avoid trying to build attributes with not intended fields - Previously: if the header field is not an attribute type, then it was added as an attribute field. PyMISP then used to skip it if needed - Now: Those fields are discarded before they are put in an attribute --- misp_modules/modules/import_mod/csvimport.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py index 85b9c6b..7bea557 100644 --- a/misp_modules/modules/import_mod/csvimport.py +++ b/misp_modules/modules/import_mod/csvimport.py @@ -14,6 +14,7 @@ userConfig = {'header': { duplicatedFields = {'mispType': {'mispComment': 'comment'}, 'attrField': {'attrComment': 'comment'}} +attributesFields = ['type', 'value', 'category', 'to_ids', 'comment', 'distribution'] class CsvParser(): def __init__(self, header): @@ -96,9 +97,12 @@ class CsvParser(): elif h in duplicatedFields['attrField']: # fields that should be considered as attribute fields head.append(duplicatedFields['attrField'].get(h)) - # otherwise, it is an attribute field - else: + # or, it could be an attribute field + elif h in attributesFields: head.append(h) + # otherwise, it is not defined + else: + head.append('') # return list of indexes of the misp types, list of the misp types, remaining fields that will be attribute fields return list2pop, misp, list(reversed(head)) From 1fb72f3c7a645a9674122d54135851a3d1c3997e Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Fri, 18 May 2018 11:33:53 +0200 Subject: [PATCH 33/34] add: Added user config to specify if there is a header in the csv to import --- misp_modules/modules/import_mod/csvimport.py | 32 ++++++++++++-------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py index 7bea557..9342a9f 100644 --- a/misp_modules/modules/import_mod/csvimport.py +++ b/misp_modules/modules/import_mod/csvimport.py @@ -10,15 +10,21 @@ moduleconfig = [] inputSource = ['file'] userConfig = {'header': { 'type': 'String', - 'message': 'Define the header of the csv file, with types (included in MISP attribute types or attribute fields) separated by commas.\nFor fields that do not match these types, please use space or simply nothing between commas.\nFor instance: ip-src,domain, ,timestamp'}} + 'message': 'Define the header of the csv file, with types (included in MISP attribute types or attribute fields) separated by commas.\nFor fields that do not match these types, please use space or simply nothing between commas.\nFor instance: ip-src,domain, ,timestamp'}, + 'has_header':{ + 'type': 'Boolean', + 'message': 'Tick this box ONLY if there is a header line, NOT COMMENTED, in the file (which will be skipped atm).' + }} duplicatedFields = {'mispType': {'mispComment': 'comment'}, 'attrField': {'attrComment': 'comment'}} attributesFields = ['type', 'value', 'category', 'to_ids', 'comment', 'distribution'] class CsvParser(): - def __init__(self, header): + def __init__(self, header, has_header): self.header = header + self.fields_number = len(header) + self.has_header = has_header self.attributes = [] def parse_data(self, data): @@ -27,12 +33,12 @@ class CsvParser(): l = line.split('#')[0].strip() if '#' in line else line.strip() if l: return_data.append(l) - self.data = return_data + self.data = return_data[1:] if self.has_header else return_data # find which delimiter is used - self.delimiter, self.length = self.findDelimiter() + self.delimiter = self.find_delimiter() - def findDelimiter(self): - n = len(self.header) + def find_delimiter(self): + n = self.fields_number if n > 1: tmpData = [] for da in self.data: @@ -41,11 +47,11 @@ class CsvParser(): if da.count(d) == (n-1): tmp.append(d) if len(tmp) == 1 and tmp == tmpData: - return tmpData[0], n + return tmpData[0] else: tmpData = tmp else: - return None, 1 + return None def buildAttributes(self): # if there is only 1 field of data @@ -63,7 +69,7 @@ class CsvParser(): datamisp = [] datasplit = data.split(self.delimiter) # in case there is an empty line or an error - if len(datasplit) != self.length: + if len(datasplit) != self.fields_number: continue # pop from the line data that matches with a misp type, using the list of indexes for l in list2pop: @@ -118,9 +124,11 @@ def handler(q=False): if not request.get('config') and not request['config'].get('header'): misperrors['error'] = "Configuration error" return misperrors - config = request['config'].get('header').split(',') - config = [c.strip() for c in config] - csv_parser = CsvParser(config) + header = request['config'].get('header').split(',') + header = [c.strip() for c in header] + has_header = request['config'].get('has_header') + has_header = True if has_header == '1' else False + csv_parser = CsvParser(header, has_header) csv_parser.parse_data(data.split('\n')) # build the attributes csv_parser.buildAttributes() From 2b509a2fd3f7f7df769488def9014a683cef9477 Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Fri, 18 May 2018 11:38:13 +0200 Subject: [PATCH 34/34] Updated delimiter finder function --- misp_modules/modules/import_mod/csvimport.py | 43 +++++++++++--------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py index 9342a9f..5ccf287 100644 --- a/misp_modules/modules/import_mod/csvimport.py +++ b/misp_modules/modules/import_mod/csvimport.py @@ -19,6 +19,7 @@ userConfig = {'header': { duplicatedFields = {'mispType': {'mispComment': 'comment'}, 'attrField': {'attrComment': 'comment'}} attributesFields = ['type', 'value', 'category', 'to_ids', 'comment', 'distribution'] +delimiters = [',', ';', '|', '/', '\t', ' '] class CsvParser(): def __init__(self, header, has_header): @@ -29,29 +30,31 @@ class CsvParser(): def parse_data(self, data): return_data = [] - for line in data: - l = line.split('#')[0].strip() if '#' in line else line.strip() - if l: - return_data.append(l) + if self.fields_number == 1: + for line in data: + l = line.split('#')[0].strip() + if l: + return_data.append(l) + self.delimiter = None + else: + self.delimiter_count = dict([(d, 0) for d in delimiters]) + for line in data: + l = line.split('#')[0].strip() + if l: + self.parse_delimiter(l) + return_data.append(l) + # find which delimiter is used + self.delimiter = self.find_delimiter() self.data = return_data[1:] if self.has_header else return_data - # find which delimiter is used - self.delimiter = self.find_delimiter() + + def parse_delimiter(self, line): + for d in delimiters: + if line.count(d) >= (self.fields_number - 1): + self.delimiter_count[d] += 1 def find_delimiter(self): - n = self.fields_number - if n > 1: - tmpData = [] - for da in self.data: - tmp = [] - for d in (';', '|', '/', ',', '\t', ' ',): - if da.count(d) == (n-1): - tmp.append(d) - if len(tmp) == 1 and tmp == tmpData: - return tmpData[0] - else: - tmpData = tmp - else: - return None + _, delimiter = max((n, v) for v, n in self.delimiter_count.items()) + return delimiter def buildAttributes(self): # if there is only 1 field of data