From dba8bd8c5bb0b6cc9e24cd938421240059f5b9f8 Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Thu, 17 May 2018 16:24:11 +0200 Subject: [PATCH 1/3] fix: Avoid trying to build attributes with not intended fields - Previously: if the header field is not an attribute type, then it was added as an attribute field. PyMISP then used to skip it if needed - Now: Those fields are discarded before they are put in an attribute --- misp_modules/modules/import_mod/csvimport.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py index 85b9c6b..7bea557 100644 --- a/misp_modules/modules/import_mod/csvimport.py +++ b/misp_modules/modules/import_mod/csvimport.py @@ -14,6 +14,7 @@ userConfig = {'header': { duplicatedFields = {'mispType': {'mispComment': 'comment'}, 'attrField': {'attrComment': 'comment'}} +attributesFields = ['type', 'value', 'category', 'to_ids', 'comment', 'distribution'] class CsvParser(): def __init__(self, header): @@ -96,9 +97,12 @@ class CsvParser(): elif h in duplicatedFields['attrField']: # fields that should be considered as attribute fields head.append(duplicatedFields['attrField'].get(h)) - # otherwise, it is an attribute field - else: + # or, it could be an attribute field + elif h in attributesFields: head.append(h) + # otherwise, it is not defined + else: + head.append('') # return list of indexes of the misp types, list of the misp types, remaining fields that will be attribute fields return list2pop, misp, list(reversed(head)) From 1fb72f3c7a645a9674122d54135851a3d1c3997e Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Fri, 18 May 2018 11:33:53 +0200 Subject: [PATCH 2/3] add: Added user config to specify if there is a header in the csv to import --- misp_modules/modules/import_mod/csvimport.py | 32 ++++++++++++-------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py index 7bea557..9342a9f 100644 --- a/misp_modules/modules/import_mod/csvimport.py +++ b/misp_modules/modules/import_mod/csvimport.py @@ -10,15 +10,21 @@ moduleconfig = [] inputSource = ['file'] userConfig = {'header': { 'type': 'String', - 'message': 'Define the header of the csv file, with types (included in MISP attribute types or attribute fields) separated by commas.\nFor fields that do not match these types, please use space or simply nothing between commas.\nFor instance: ip-src,domain, ,timestamp'}} + 'message': 'Define the header of the csv file, with types (included in MISP attribute types or attribute fields) separated by commas.\nFor fields that do not match these types, please use space or simply nothing between commas.\nFor instance: ip-src,domain, ,timestamp'}, + 'has_header':{ + 'type': 'Boolean', + 'message': 'Tick this box ONLY if there is a header line, NOT COMMENTED, in the file (which will be skipped atm).' + }} duplicatedFields = {'mispType': {'mispComment': 'comment'}, 'attrField': {'attrComment': 'comment'}} attributesFields = ['type', 'value', 'category', 'to_ids', 'comment', 'distribution'] class CsvParser(): - def __init__(self, header): + def __init__(self, header, has_header): self.header = header + self.fields_number = len(header) + self.has_header = has_header self.attributes = [] def parse_data(self, data): @@ -27,12 +33,12 @@ class CsvParser(): l = line.split('#')[0].strip() if '#' in line else line.strip() if l: return_data.append(l) - self.data = return_data + self.data = return_data[1:] if self.has_header else return_data # find which delimiter is used - self.delimiter, self.length = self.findDelimiter() + self.delimiter = self.find_delimiter() - def findDelimiter(self): - n = len(self.header) + def find_delimiter(self): + n = self.fields_number if n > 1: tmpData = [] for da in self.data: @@ -41,11 +47,11 @@ class CsvParser(): if da.count(d) == (n-1): tmp.append(d) if len(tmp) == 1 and tmp == tmpData: - return tmpData[0], n + return tmpData[0] else: tmpData = tmp else: - return None, 1 + return None def buildAttributes(self): # if there is only 1 field of data @@ -63,7 +69,7 @@ class CsvParser(): datamisp = [] datasplit = data.split(self.delimiter) # in case there is an empty line or an error - if len(datasplit) != self.length: + if len(datasplit) != self.fields_number: continue # pop from the line data that matches with a misp type, using the list of indexes for l in list2pop: @@ -118,9 +124,11 @@ def handler(q=False): if not request.get('config') and not request['config'].get('header'): misperrors['error'] = "Configuration error" return misperrors - config = request['config'].get('header').split(',') - config = [c.strip() for c in config] - csv_parser = CsvParser(config) + header = request['config'].get('header').split(',') + header = [c.strip() for c in header] + has_header = request['config'].get('has_header') + has_header = True if has_header == '1' else False + csv_parser = CsvParser(header, has_header) csv_parser.parse_data(data.split('\n')) # build the attributes csv_parser.buildAttributes() From 2b509a2fd3f7f7df769488def9014a683cef9477 Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Fri, 18 May 2018 11:38:13 +0200 Subject: [PATCH 3/3] Updated delimiter finder function --- misp_modules/modules/import_mod/csvimport.py | 43 +++++++++++--------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py index 9342a9f..5ccf287 100644 --- a/misp_modules/modules/import_mod/csvimport.py +++ b/misp_modules/modules/import_mod/csvimport.py @@ -19,6 +19,7 @@ userConfig = {'header': { duplicatedFields = {'mispType': {'mispComment': 'comment'}, 'attrField': {'attrComment': 'comment'}} attributesFields = ['type', 'value', 'category', 'to_ids', 'comment', 'distribution'] +delimiters = [',', ';', '|', '/', '\t', ' '] class CsvParser(): def __init__(self, header, has_header): @@ -29,29 +30,31 @@ class CsvParser(): def parse_data(self, data): return_data = [] - for line in data: - l = line.split('#')[0].strip() if '#' in line else line.strip() - if l: - return_data.append(l) + if self.fields_number == 1: + for line in data: + l = line.split('#')[0].strip() + if l: + return_data.append(l) + self.delimiter = None + else: + self.delimiter_count = dict([(d, 0) for d in delimiters]) + for line in data: + l = line.split('#')[0].strip() + if l: + self.parse_delimiter(l) + return_data.append(l) + # find which delimiter is used + self.delimiter = self.find_delimiter() self.data = return_data[1:] if self.has_header else return_data - # find which delimiter is used - self.delimiter = self.find_delimiter() + + def parse_delimiter(self, line): + for d in delimiters: + if line.count(d) >= (self.fields_number - 1): + self.delimiter_count[d] += 1 def find_delimiter(self): - n = self.fields_number - if n > 1: - tmpData = [] - for da in self.data: - tmp = [] - for d in (';', '|', '/', ',', '\t', ' ',): - if da.count(d) == (n-1): - tmp.append(d) - if len(tmp) == 1 and tmp == tmpData: - return tmpData[0] - else: - tmpData = tmp - else: - return None + _, delimiter = max((n, v) for v, n in self.delimiter_count.items()) + return delimiter def buildAttributes(self): # if there is only 1 field of data