Merge pull request #169 from chrisr3d/master

Updated GoAML import including Object References
pull/170/head
Alexandre Dulaunoy 2018-03-05 21:35:18 +01:00 committed by GitHub
commit 297343b7fc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 105 additions and 90 deletions

View File

@ -10,7 +10,94 @@ moduleinfo = {'version': '0.1', 'author': 'Christian Studer',
moduleconfig = ['header'] moduleconfig = ['header']
duplicatedFields = {'mispType': {'mispComment': 'comment'}, duplicatedFields = {'mispType': {'mispComment': 'comment'},
'attrField': {'eventComment': 'comment'}} 'attrField': {'attrComment': 'comment'}}
class CsvParser():
def __init__(self, header):
self.header = header
self.attributes = []
def parse_data(self, data):
return_data = []
for line in data:
l = line.split('#')[0].strip() if '#' in line else line.strip()
if l:
return_data.append(l)
self.data = return_data
# find which delimiter is used
self.delimiter, self.length = self.findDelimiter()
def findDelimiter(self):
n = len(self.header)
if n > 1:
tmpData = []
for da in self.data:
tmp = []
for d in (';', '|', '/', ',', '\t', ' ',):
if da.count(d) == (n-1):
tmp.append(d)
if len(tmp) == 1 and tmp == tmpData:
return tmpData[0], n
else:
tmpData = tmp
else:
return None, 1
def buildAttributes(self):
# if there is only 1 field of data
if self.delimiter is None:
mispType = self.header[0]
for data in self.data:
d = data.strip()
if d:
self.attributes.append({'types': mispType, 'values': d})
else:
# split fields that should be recognized as misp attribute types from the others
list2pop, misp, head = self.findMispTypes()
# for each line of data
for data in self.data:
datamisp = []
datasplit = data.split(self.delimiter)
# in case there is an empty line or an error
if len(datasplit) != self.length:
continue
# pop from the line data that matches with a misp type, using the list of indexes
for l in list2pop:
datamisp.append(datasplit.pop(l).strip())
# for each misp type, we create an attribute
for m, dm in zip(misp, datamisp):
attribute = {'types': m, 'values': dm}
for h, ds in zip(head, datasplit):
if h:
attribute[h] = ds.strip()
self.attributes.append(attribute)
def findMispTypes(self):
descFilename = os.path.join(pymisp.__path__[0], 'data/describeTypes.json')
with open(descFilename, 'r') as f:
MispTypes = json.loads(f.read())['result'].get('types')
list2pop = []
misp = []
head = []
for h in reversed(self.header):
n = self.header.index(h)
# fields that are misp attribute types
if h in MispTypes:
list2pop.append(n)
misp.append(h)
# handle confusions between misp attribute types and attribute fields
elif h in duplicatedFields['mispType']:
# fields that should be considered as misp attribute types
list2pop.append(n)
misp.append(duplicatedFields['mispType'].get(h))
elif h in duplicatedFields['attrField']:
# fields that should be considered as attribute fields
head.append(duplicatedFields['attrField'].get(h))
# otherwise, it is an attribute field
else:
head.append(h)
# return list of indexes of the misp types, list of the misp types, remaining fields that will be attribute fields
return list2pop, misp, list(reversed(head))
def handler(q=False): def handler(q=False):
if q is False: if q is False:
@ -26,96 +113,13 @@ def handler(q=False):
return misperrors return misperrors
config = request['config'].get('header').split(',') config = request['config'].get('header').split(',')
config = [c.strip() for c in config] config = [c.strip() for c in config]
data = parse_data(data.split('\n')) csv_parser = CsvParser(config)
# find which delimiter is used csv_parser.parse_data(data.split('\n'))
delimiter, length = findDelimiter(config, data)
# build the attributes # build the attributes
result = buildAttributes(config, data, delimiter, length) csv_parser.buildAttributes()
r = {'results': result} r = {'results': csv_parser.attributes}
return r return r
def parse_data(data):
return_data = []
for line in data:
l = line.split('#')[0].strip() if '#' in line else line.strip()
if l:
return_data.append(l)
return return_data
def findDelimiter(header, data):
n = len(header)
if n > 1:
tmpData = []
for da in data:
tmp = []
for d in (';', '|', '/', ',', '\t', ' ',):
if da.count(d) == (n-1):
tmp.append(d)
if len(tmp) == 1 and tmp == tmpData:
return tmpData[0], n
else:
tmpData = tmp
else:
return None, 1
def buildAttributes(header, dataValues, delimiter, length):
attributes = []
# if there is only 1 field of data
if delimiter is None:
mispType = header[0]
for data in dataValues:
d = data.strip()
if d:
attributes.append({'types': mispType, 'values': d})
else:
# split fields that should be recognized as misp attribute types from the others
list2pop, misp, head = findMispTypes(header)
# for each line of data
for data in dataValues:
datamisp = []
datasplit = data.split(delimiter)
# in case there is an empty line or an error
if len(datasplit) != length:
continue
# pop from the line data that matches with a misp type, using the list of indexes
for l in list2pop:
datamisp.append(datasplit.pop(l).strip())
# for each misp type, we create an attribute
for m, dm in zip(misp, datamisp):
attribute = {'types': m, 'values': dm}
for h, ds in zip(head, datasplit):
if h:
attribute[h] = ds.strip()
attributes.append(attribute)
return attributes
def findMispTypes(header):
descFilename = os.path.join(pymisp.__path__[0], 'data/describeTypes.json')
with open(descFilename, 'r') as f:
MispTypes = json.loads(f.read())['result'].get('types')
list2pop = []
misp = []
head = []
for h in reversed(header):
n = header.index(h)
# fields that are misp attribute types
if h in MispTypes:
list2pop.append(n)
misp.append(h)
# handle confusions between misp attribute types and attribute fields
elif h in duplicatedFields['mispType']:
# fields that should be considered as misp attribute types
list2pop.append(n)
misp.append(duplicatedFields['mispType'].get(h))
elif h in duplicatedFields['attrField']:
# fields that should be considered as attribute fields
head.append(duplicatedFields['attrField'].get(h))
# otherwise, it is an attribute field
else:
head.append(h)
# return list of indexes of the misp types, list of the misp types, remaining fields that will be attribute fields
return list2pop, misp, list(reversed(head))
def introspection(): def introspection():
return mispattributes return mispattributes

View File

@ -71,6 +71,7 @@ goAMLmapping = {'from_account': t_account_mapping, 'to_account': t_account_mappi
'authorized': 'authorized', 'transaction_description': 'text'}} 'authorized': 'authorized', 'transaction_description': 'text'}}
nodes_to_ignore = ['addresses', 'signatory'] nodes_to_ignore = ['addresses', 'signatory']
relationship_to_keep = ['signatory', 't_from', 't_from_my_client', 't_to', 't_to_my_client', 'address']
class GoAmlParser(): class GoAmlParser():
def __init__(self): def __init__(self):
@ -92,8 +93,10 @@ class GoAmlParser():
if element is not None: if element is not None:
self.itterate(element, element.tag) self.itterate(element, element.tag)
def itterate(self, tree, aml_type): def itterate(self, tree, aml_type, referencing_uuid=None, relationship_type=None):
objects = goAMLobjects[aml_type] objects = goAMLobjects[aml_type]
referenced_uuid = referencing_uuid
rel = relationship_type
if aml_type not in nodes_to_ignore: if aml_type not in nodes_to_ignore:
try: try:
mapping = goAMLmapping[aml_type] mapping = goAMLmapping[aml_type]
@ -110,12 +113,20 @@ class GoAmlParser():
if element is not None: if element is not None:
self.fill_transaction(element, element.tag, misp_object) self.fill_transaction(element, element.tag, misp_object)
self.misp_event.add_object(misp_object) self.misp_event.add_object(misp_object)
last_object = self.misp_event.objects[-1]
referenced_uuid = last_object.uuid
if referencing_uuid and relationship_type:
referencing_object = self.misp_event.get_object_by_uuid(referencing_uuid)
referencing_object.add_reference(referenced_uuid, rel, None, **last_object)
except KeyError: except KeyError:
pass pass
for node in objects['nodes']: for node in objects['nodes']:
element = tree.find(node) element = tree.find(node)
if element is not None: if element is not None:
self.itterate(element, element.tag) tag = element.tag
if tag in relationship_to_keep:
rel = tag[2:] if tag.startswith('t_') else tag
self.itterate(element, element.tag, referencing_uuid=referenced_uuid, relationship_type=rel)
@staticmethod @staticmethod
def fill_transaction(element, tag, misp_object): def fill_transaction(element, tag, misp_object):