Merge pull request #169 from chrisr3d/master

Updated GoAML import including Object References
2018-03-05 21:35:18 +01:00 · 2018-03-05 21:35:18 +01:00 · 297343b7fc
parent b1dd21fd06 d885286792
commit 297343b7fc
2 changed files with 105 additions and 90 deletions
--- a/misp_modules/modules/import_mod/csvimport.py
+++ b/misp_modules/modules/import_mod/csvimport.py
@ -10,7 +10,94 @@ moduleinfo = {'version': '0.1', 'author': 'Christian Studer',
 moduleconfig = ['header']
 duplicatedFields = {'mispType': {'mispComment': 'comment'},
-                    'attrField': {'eventComment': 'comment'}}
+                    'attrField': {'attrComment': 'comment'}}
 class CsvParser():
    def __init__(self, header):
        self.header = header
        self.attributes = []
    def parse_data(self, data):
        return_data = []
        for line in data:
            l = line.split('#')[0].strip() if '#' in line else line.strip()
            if l:
                return_data.append(l)
        self.data = return_data
        # find which delimiter is used
        self.delimiter, self.length = self.findDelimiter()
    def findDelimiter(self):
        n = len(self.header)
        if n > 1:
            tmpData = []
            for da in self.data:
                tmp = []
                for d in (';', '|', '/', ',', '\t', '    ',):
                    if da.count(d) == (n-1):
                        tmp.append(d)
                if len(tmp) == 1 and tmp == tmpData:
                    return tmpData[0], n
                else:
                    tmpData = tmp
        else:
            return None, 1
    def buildAttributes(self):
        # if there is only 1 field of data
        if self.delimiter is None:
            mispType = self.header[0]
            for data in self.data:
                d = data.strip()
                if d:
                    self.attributes.append({'types': mispType, 'values': d})
        else:
            # split fields that should be recognized as misp attribute types from the others
            list2pop, misp, head = self.findMispTypes()
            # for each line of data
            for data in self.data:
                datamisp = []
                datasplit = data.split(self.delimiter)
                # in case there is an empty line or an error
                if len(datasplit) != self.length:
                    continue
                # pop from the line data that matches with a misp type, using the list of indexes
                for l in list2pop:
                    datamisp.append(datasplit.pop(l).strip())
                # for each misp type, we create an attribute
                for m, dm in zip(misp, datamisp):
                    attribute = {'types': m, 'values': dm}
                    for h, ds in zip(head, datasplit):
                        if h:
                            attribute[h] = ds.strip()
                    self.attributes.append(attribute)
    def findMispTypes(self):
        descFilename = os.path.join(pymisp.__path__[0], 'data/describeTypes.json')
        with open(descFilename, 'r') as f:
            MispTypes = json.loads(f.read())['result'].get('types')
        list2pop = []
        misp = []
        head = []
        for h in reversed(self.header):
            n = self.header.index(h)
            # fields that are misp attribute types
            if h in MispTypes:
                list2pop.append(n)
                misp.append(h)
            # handle confusions between misp attribute types and attribute fields
            elif h in duplicatedFields['mispType']:
                # fields that should be considered as misp attribute types
                list2pop.append(n)
                misp.append(duplicatedFields['mispType'].get(h))
            elif h in duplicatedFields['attrField']:
                # fields that should be considered as attribute fields
                head.append(duplicatedFields['attrField'].get(h))
            # otherwise, it is an attribute field
            else:
                head.append(h)
        # return list of indexes of the misp types, list of the misp types, remaining fields that will be attribute fields
        return list2pop, misp, list(reversed(head))
 def handler(q=False):
    if q is False:
@ -26,96 +113,13 @@ def handler(q=False):
        return misperrors
    config = request['config'].get('header').split(',')
    config = [c.strip() for c in config]
-    data = parse_data(data.split('\n'))
+    csv_parser = CsvParser(config)
-    # find which delimiter is used
+    csv_parser.parse_data(data.split('\n'))
    delimiter, length = findDelimiter(config, data)
    # build the attributes
-    result = buildAttributes(config, data, delimiter, length)
+    csv_parser.buildAttributes()
-    r = {'results': result}
+    r = {'results': csv_parser.attributes}
    return r
 def parse_data(data):
    return_data = []
    for line in data:
        l = line.split('#')[0].strip() if '#' in line else line.strip()
        if l:
            return_data.append(l)
    return return_data
 def findDelimiter(header, data):
    n = len(header)
    if n > 1:
        tmpData = []
        for da in data:
            tmp = []
            for d in (';', '|', '/', ',', '\t', '    ',):
                if da.count(d) == (n-1):
                    tmp.append(d)
            if len(tmp) == 1 and tmp == tmpData:
                return tmpData[0], n
            else:
                tmpData = tmp
    else:
        return None, 1
 def buildAttributes(header, dataValues, delimiter, length):
    attributes = []
    # if there is only 1 field of data
    if delimiter is None:
        mispType = header[0]
        for data in dataValues:
            d = data.strip()
            if d:
                attributes.append({'types': mispType, 'values': d})
    else:
        # split fields that should be recognized as misp attribute types from the others
        list2pop, misp, head = findMispTypes(header)
        # for each line of data
        for data in dataValues:
            datamisp = []
            datasplit = data.split(delimiter)
            # in case there is an empty line or an error
            if len(datasplit) != length:
                continue
            # pop from the line data that matches with a misp type, using the list of indexes
            for l in list2pop:
                datamisp.append(datasplit.pop(l).strip())
            # for each misp type, we create an attribute
            for m, dm in zip(misp, datamisp):
                attribute = {'types': m, 'values': dm}
                for h, ds in zip(head, datasplit):
                    if h:
                        attribute[h] = ds.strip()
                attributes.append(attribute)
    return attributes
 def findMispTypes(header):
    descFilename = os.path.join(pymisp.__path__[0], 'data/describeTypes.json')
    with open(descFilename, 'r') as f:
        MispTypes = json.loads(f.read())['result'].get('types')
    list2pop = []
    misp = []
    head = []
    for h in reversed(header):
        n = header.index(h)
        # fields that are misp attribute types
        if h in MispTypes:
            list2pop.append(n)
            misp.append(h)
        # handle confusions between misp attribute types and attribute fields
        elif h in duplicatedFields['mispType']:
            # fields that should be considered as misp attribute types
            list2pop.append(n)
            misp.append(duplicatedFields['mispType'].get(h))
        elif h in duplicatedFields['attrField']:
            # fields that should be considered as attribute fields
            head.append(duplicatedFields['attrField'].get(h))
        # otherwise, it is an attribute field
        else:
            head.append(h)
    # return list of indexes of the misp types, list of the misp types, remaining fields that will be attribute fields
    return list2pop, misp, list(reversed(head))
 def introspection():
    return mispattributes
--- a/misp_modules/modules/import_mod/goamlimport.py
+++ b/misp_modules/modules/import_mod/goamlimport.py
@ -71,6 +71,7 @@ goAMLmapping = {'from_account': t_account_mapping, 'to_account': t_account_mappi
                                'authorized': 'authorized', 'transaction_description': 'text'}}
 nodes_to_ignore = ['addresses', 'signatory']
 relationship_to_keep = ['signatory', 't_from', 't_from_my_client', 't_to', 't_to_my_client', 'address']
 class GoAmlParser():
    def __init__(self):
@ -92,8 +93,10 @@ class GoAmlParser():
            if element is not None:
                self.itterate(element, element.tag)
-    def itterate(self, tree, aml_type):
+    def itterate(self, tree, aml_type, referencing_uuid=None, relationship_type=None):
        objects = goAMLobjects[aml_type]
        referenced_uuid = referencing_uuid
        rel = relationship_type
        if aml_type not in nodes_to_ignore:
            try:
                mapping = goAMLmapping[aml_type]
@ -110,12 +113,20 @@ class GoAmlParser():
                        if element is not None:
                            self.fill_transaction(element, element.tag, misp_object)
                self.misp_event.add_object(misp_object)
                last_object = self.misp_event.objects[-1]
                referenced_uuid = last_object.uuid
                if referencing_uuid and relationship_type:
                    referencing_object = self.misp_event.get_object_by_uuid(referencing_uuid)
                    referencing_object.add_reference(referenced_uuid, rel, None, **last_object)
            except KeyError:
                pass
        for node in objects['nodes']:
            element = tree.find(node)
            if element is not None:
-                self.itterate(element, element.tag)
+                tag = element.tag
                if tag in relationship_to_keep:
                    rel = tag[2:] if tag.startswith('t_') else tag
                self.itterate(element, element.tag, referencing_uuid=referenced_uuid, relationship_type=rel)
    @staticmethod
    def fill_transaction(element, tag, misp_object):