diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py index 9342a9f..5ccf287 100644 --- a/misp_modules/modules/import_mod/csvimport.py +++ b/misp_modules/modules/import_mod/csvimport.py @@ -19,6 +19,7 @@ userConfig = {'header': { duplicatedFields = {'mispType': {'mispComment': 'comment'}, 'attrField': {'attrComment': 'comment'}} attributesFields = ['type', 'value', 'category', 'to_ids', 'comment', 'distribution'] +delimiters = [',', ';', '|', '/', '\t', ' '] class CsvParser(): def __init__(self, header, has_header): @@ -29,29 +30,31 @@ class CsvParser(): def parse_data(self, data): return_data = [] - for line in data: - l = line.split('#')[0].strip() if '#' in line else line.strip() - if l: - return_data.append(l) + if self.fields_number == 1: + for line in data: + l = line.split('#')[0].strip() + if l: + return_data.append(l) + self.delimiter = None + else: + self.delimiter_count = dict([(d, 0) for d in delimiters]) + for line in data: + l = line.split('#')[0].strip() + if l: + self.parse_delimiter(l) + return_data.append(l) + # find which delimiter is used + self.delimiter = self.find_delimiter() self.data = return_data[1:] if self.has_header else return_data - # find which delimiter is used - self.delimiter = self.find_delimiter() + + def parse_delimiter(self, line): + for d in delimiters: + if line.count(d) >= (self.fields_number - 1): + self.delimiter_count[d] += 1 def find_delimiter(self): - n = self.fields_number - if n > 1: - tmpData = [] - for da in self.data: - tmp = [] - for d in (';', '|', '/', ',', '\t', ' ',): - if da.count(d) == (n-1): - tmp.append(d) - if len(tmp) == 1 and tmp == tmpData: - return tmpData[0] - else: - tmpData = tmp - else: - return None + _, delimiter = max((n, v) for v, n in self.delimiter_count.items()) + return delimiter def buildAttributes(self): # if there is only 1 field of data