From b9d72bb043ca7f0a707a7a062e3a554fc51461b2 Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Thu, 25 Jan 2018 15:44:08 +0100 Subject: [PATCH 1/6] First version of csv import module - If more than 1 misp type is recognized, for each one an attribute is created - Needs to have header set by user as parameters of the module atm - Review needed to see the feasibility with fields that can create confusion and be interpreted both as misp type or attribute field (for instance comment is a misp type and an attribute field) --- misp_modules/modules/import_mod/csvimport.py | 113 +++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 misp_modules/modules/import_mod/csvimport.py diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py new file mode 100644 index 0000000..fc5b4e7 --- /dev/null +++ b/misp_modules/modules/import_mod/csvimport.py @@ -0,0 +1,113 @@ +import json, os +import pymisp + +misperrors = {'error': 'Error'} +mispattributes = {'input': ['file'], 'output': ['MISP attributes']} +moduleinfo = {'version': '0.1', 'author': 'Christian Studer', + 'description': 'Import Attributes from a csv file.', + 'module-type': ['import']} +moduleconfig = ['header'] + +duplicatedFields = {'mispType': {'mispComment': 'comment'}, + 'attrField': {'eventComment': 'comment'}} + +def handler(q=False): + if q is False: + return False + request = json.loads(q) + if request.get('file'): + filename = request['file'] + else: + misperrors['error'] = "Unsupported attributes type" + return misperrors + if not request.get('config') and not request['config'].get('header'): + misperrors['error'] = "Configuration error" + return misperrors + config = request['config'].get('header') + header = [] + data = [] + with open(filename, 'r') as f: + for line in f: + # split comments from data + if line.startswith('#'): + header.append(line) + else: + data.append(line) + # find which delimiter is used + delimiter, length = findDelimiter(config, data) + # build the attributes + result = buildAttributes(config, data, delimiter, length) + r = {'results': [{'types': mispattributes['output'], 'values': result}]} + return r + +def findDelimiter(header, data): + n = len(header) + if n > 1: + for d in (';', ',', '|'): + if data[0].count(d) == (n-1): + return d, n + break + else: + return None, 1 + +def buildAttributes(header, dataValues, delimiter, length): + attributes = [] + # if there is only 1 field of data + if delimiter is None: + mispType = header[0] + for data in dataValues: + attributes.append({'type': mispType, 'value': data}) + else: + # split fields that should be recognized as misp attribute types from the others + list2pop, misp, head = findMispTypes(header) + # for each line of data + for data in dataValues: + datamisp = [] + datasplit = data.split(delimiter) + # in case there is an empty line or an error + if len(datasplit) != length: + continue + # pop from the line data that matches with a misp type, using the list of indexes + for l in list2pop: + datamisp.append(datasplit.pop(l).strip()) + # for each misp type, we create an attribute + for m, dm in zip(misp, datamisp): + attribute = {'type': m, 'value': dm} + for h, ds in zip(head, datasplit): + if h: + attribute[h] = ds.strip() + attributes.append(attribute) + return attributes + +def findMispTypes(header): + descFilename = os.path.join(pymisp.__path__[0], 'data/describeTypes.json') + with open(descFilename, 'r') as f: + MispTypes = json.loads(f.read())['result'].get('types') + list2pop = [] + misp = [] + head = [] + for h in reversed(header): + n = header.index(h) + # fields that are misp attribute types + if h in MispTypes: + list2pop.append(n) + misp.append(h) + # handle confusions between misp attribute types and attribute fields + elif h in duplicatedFields['mispType']: + # fields that should be considered as misp attribute types + list2pop.append(n) + misp.append(duplicatedFields['mispType'].get(h)) + elif h in duplicatedFields['attrField']: + # fields that should be considered as attribute fields + head.append(duplicatedFields['attrField'].get(h)) + else: + head.append(h) + # return list of indexes of the misp types, list of the misp types, remaining fields that will be attribute fields + return list2pop, misp, head + +def introspection(): + return mispattributes + +def version(): + moduleinfo['config'] = moduleconfig + return moduleinfo From 4d846f968f4a1e2671b64a18a4b6b1371b8f5e41 Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Fri, 26 Jan 2018 17:11:01 +0100 Subject: [PATCH 2/6] Updated delimiter parsing & data reading functions --- misp_modules/modules/import_mod/csvimport.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py index fc5b4e7..e5ff3b0 100644 --- a/misp_modules/modules/import_mod/csvimport.py +++ b/misp_modules/modules/import_mod/csvimport.py @@ -24,13 +24,17 @@ def handler(q=False): misperrors['error'] = "Configuration error" return misperrors config = request['config'].get('header') - header = [] + #header = [] data = [] with open(filename, 'r') as f: for line in f: # split comments from data - if line.startswith('#'): - header.append(line) + if '#' in line: + l = line.split('#')[0] + if l: + data.append(l) + #else: + #header.append(line) else: data.append(line) # find which delimiter is used @@ -43,10 +47,9 @@ def handler(q=False): def findDelimiter(header, data): n = len(header) if n > 1: - for d in (';', ',', '|'): + for d in (';', '|', '/', ','): if data[0].count(d) == (n-1): return d, n - break else: return None, 1 @@ -100,6 +103,7 @@ def findMispTypes(header): elif h in duplicatedFields['attrField']: # fields that should be considered as attribute fields head.append(duplicatedFields['attrField'].get(h)) + # otherwise, it is an attribute field else: head.append(h) # return list of indexes of the misp types, list of the misp types, remaining fields that will be attribute fields From 56cbd72b6526bf6b777cb62cea5f4be99b390463 Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Sun, 28 Jan 2018 18:12:40 +0100 Subject: [PATCH 3/6] Fixed data treatment & other updates --- misp_modules/modules/import_mod/csvimport.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py index e5ff3b0..4076902 100644 --- a/misp_modules/modules/import_mod/csvimport.py +++ b/misp_modules/modules/import_mod/csvimport.py @@ -47,7 +47,7 @@ def handler(q=False): def findDelimiter(header, data): n = len(header) if n > 1: - for d in (';', '|', '/', ','): + for d in (';', '|', '/', ',', ' '): if data[0].count(d) == (n-1): return d, n else: @@ -59,7 +59,7 @@ def buildAttributes(header, dataValues, delimiter, length): if delimiter is None: mispType = header[0] for data in dataValues: - attributes.append({'type': mispType, 'value': data}) + attributes.append({'type': mispType, 'value': data.strip()}) else: # split fields that should be recognized as misp attribute types from the others list2pop, misp, head = findMispTypes(header) @@ -107,7 +107,7 @@ def findMispTypes(header): else: head.append(h) # return list of indexes of the misp types, list of the misp types, remaining fields that will be attribute fields - return list2pop, misp, head + return list2pop, misp, list(reversed(head)) def introspection(): return mispattributes From 529d22cca80820690183a10016f03f38d5047a6c Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Mon, 29 Jan 2018 09:19:58 +0100 Subject: [PATCH 4/6] fix: skipping empty lines --- misp_modules/modules/import_mod/csvimport.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py index 4076902..50e9837 100644 --- a/misp_modules/modules/import_mod/csvimport.py +++ b/misp_modules/modules/import_mod/csvimport.py @@ -59,7 +59,9 @@ def buildAttributes(header, dataValues, delimiter, length): if delimiter is None: mispType = header[0] for data in dataValues: - attributes.append({'type': mispType, 'value': data.strip()}) + d = data.strip() + if d: + attributes.append({'type': mispType, 'value': d}) else: # split fields that should be recognized as misp attribute types from the others list2pop, misp, head = findMispTypes(header) From b2ec186ccb14a48b3e4370925bc05fb439081d06 Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Mon, 29 Jan 2018 17:04:32 +0100 Subject: [PATCH 5/6] Updated delimiter finder method --- misp_modules/modules/import_mod/csvimport.py | 23 ++++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py index 50e9837..7e0107f 100644 --- a/misp_modules/modules/import_mod/csvimport.py +++ b/misp_modules/modules/import_mod/csvimport.py @@ -30,13 +30,11 @@ def handler(q=False): for line in f: # split comments from data if '#' in line: - l = line.split('#')[0] - if l: - data.append(l) - #else: - #header.append(line) + l = line.split('#')[0].strip() else: - data.append(line) + l = line.strip() + if l: + data.append(l) # find which delimiter is used delimiter, length = findDelimiter(config, data) # build the attributes @@ -47,9 +45,16 @@ def handler(q=False): def findDelimiter(header, data): n = len(header) if n > 1: - for d in (';', '|', '/', ',', ' '): - if data[0].count(d) == (n-1): - return d, n + tmpData = [] + for da in data: + tmp = [] + for d in (';', '|', '/', ',', '\t', ' ',): + if da.count(d) == (n-1): + tmp.append(d) + if len(tmp) == 1 and tmp == tmpData: + return tmpData[0], n + else: + tmpData = tmp else: return None, 1 From 71c00954d08862f58de8d218fdf4407fb8104f6d Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Tue, 30 Jan 2018 11:20:28 +0100 Subject: [PATCH 6/6] fix: Solved reading problems for some files --- misp_modules/modules/import_mod/csvimport.py | 22 +++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py index 7e0107f..dc67eec 100644 --- a/misp_modules/modules/import_mod/csvimport.py +++ b/misp_modules/modules/import_mod/csvimport.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- import json, os import pymisp @@ -25,8 +26,20 @@ def handler(q=False): return misperrors config = request['config'].get('header') #header = [] + try: + data = readFile(filename, 'utf-8') + except: + data = readFile(filename, 'iso-8859-1') + # find which delimiter is used + delimiter, length = findDelimiter(config, data) + # build the attributes + result = buildAttributes(config, data, delimiter, length) + r = {'results': [{'types': mispattributes['output'], 'values': result}]} + return r + +def readFile(filename, encoding): data = [] - with open(filename, 'r') as f: + with open(filename, 'r', encoding=encoding) as f: for line in f: # split comments from data if '#' in line: @@ -35,12 +48,7 @@ def handler(q=False): l = line.strip() if l: data.append(l) - # find which delimiter is used - delimiter, length = findDelimiter(config, data) - # build the attributes - result = buildAttributes(config, data, delimiter, length) - r = {'results': [{'types': mispattributes['output'], 'values': result}]} - return r + return data def findDelimiter(header, data): n = len(header)