mirror of https://github.com/MISP/misp-modules
Updated delimiter finder function
parent
1fb72f3c7a
commit
2b509a2fd3
|
@ -19,6 +19,7 @@ userConfig = {'header': {
|
|||
duplicatedFields = {'mispType': {'mispComment': 'comment'},
|
||||
'attrField': {'attrComment': 'comment'}}
|
||||
attributesFields = ['type', 'value', 'category', 'to_ids', 'comment', 'distribution']
|
||||
delimiters = [',', ';', '|', '/', '\t', ' ']
|
||||
|
||||
class CsvParser():
|
||||
def __init__(self, header, has_header):
|
||||
|
@ -29,29 +30,31 @@ class CsvParser():
|
|||
|
||||
def parse_data(self, data):
|
||||
return_data = []
|
||||
if self.fields_number == 1:
|
||||
for line in data:
|
||||
l = line.split('#')[0].strip() if '#' in line else line.strip()
|
||||
l = line.split('#')[0].strip()
|
||||
if l:
|
||||
return_data.append(l)
|
||||
self.data = return_data[1:] if self.has_header else return_data
|
||||
self.delimiter = None
|
||||
else:
|
||||
self.delimiter_count = dict([(d, 0) for d in delimiters])
|
||||
for line in data:
|
||||
l = line.split('#')[0].strip()
|
||||
if l:
|
||||
self.parse_delimiter(l)
|
||||
return_data.append(l)
|
||||
# find which delimiter is used
|
||||
self.delimiter = self.find_delimiter()
|
||||
self.data = return_data[1:] if self.has_header else return_data
|
||||
|
||||
def parse_delimiter(self, line):
|
||||
for d in delimiters:
|
||||
if line.count(d) >= (self.fields_number - 1):
|
||||
self.delimiter_count[d] += 1
|
||||
|
||||
def find_delimiter(self):
|
||||
n = self.fields_number
|
||||
if n > 1:
|
||||
tmpData = []
|
||||
for da in self.data:
|
||||
tmp = []
|
||||
for d in (';', '|', '/', ',', '\t', ' ',):
|
||||
if da.count(d) == (n-1):
|
||||
tmp.append(d)
|
||||
if len(tmp) == 1 and tmp == tmpData:
|
||||
return tmpData[0]
|
||||
else:
|
||||
tmpData = tmp
|
||||
else:
|
||||
return None
|
||||
_, delimiter = max((n, v) for v, n in self.delimiter_count.items())
|
||||
return delimiter
|
||||
|
||||
def buildAttributes(self):
|
||||
# if there is only 1 field of data
|
||||
|
|
Loading…
Reference in New Issue