mirror of https://github.com/MISP/misp-modules
Updated delimiter finder function
parent
1fb72f3c7a
commit
2b509a2fd3
|
@ -19,6 +19,7 @@ userConfig = {'header': {
|
||||||
duplicatedFields = {'mispType': {'mispComment': 'comment'},
|
duplicatedFields = {'mispType': {'mispComment': 'comment'},
|
||||||
'attrField': {'attrComment': 'comment'}}
|
'attrField': {'attrComment': 'comment'}}
|
||||||
attributesFields = ['type', 'value', 'category', 'to_ids', 'comment', 'distribution']
|
attributesFields = ['type', 'value', 'category', 'to_ids', 'comment', 'distribution']
|
||||||
|
delimiters = [',', ';', '|', '/', '\t', ' ']
|
||||||
|
|
||||||
class CsvParser():
|
class CsvParser():
|
||||||
def __init__(self, header, has_header):
|
def __init__(self, header, has_header):
|
||||||
|
@ -29,29 +30,31 @@ class CsvParser():
|
||||||
|
|
||||||
def parse_data(self, data):
|
def parse_data(self, data):
|
||||||
return_data = []
|
return_data = []
|
||||||
for line in data:
|
if self.fields_number == 1:
|
||||||
l = line.split('#')[0].strip() if '#' in line else line.strip()
|
for line in data:
|
||||||
if l:
|
l = line.split('#')[0].strip()
|
||||||
return_data.append(l)
|
if l:
|
||||||
|
return_data.append(l)
|
||||||
|
self.delimiter = None
|
||||||
|
else:
|
||||||
|
self.delimiter_count = dict([(d, 0) for d in delimiters])
|
||||||
|
for line in data:
|
||||||
|
l = line.split('#')[0].strip()
|
||||||
|
if l:
|
||||||
|
self.parse_delimiter(l)
|
||||||
|
return_data.append(l)
|
||||||
|
# find which delimiter is used
|
||||||
|
self.delimiter = self.find_delimiter()
|
||||||
self.data = return_data[1:] if self.has_header else return_data
|
self.data = return_data[1:] if self.has_header else return_data
|
||||||
# find which delimiter is used
|
|
||||||
self.delimiter = self.find_delimiter()
|
def parse_delimiter(self, line):
|
||||||
|
for d in delimiters:
|
||||||
|
if line.count(d) >= (self.fields_number - 1):
|
||||||
|
self.delimiter_count[d] += 1
|
||||||
|
|
||||||
def find_delimiter(self):
|
def find_delimiter(self):
|
||||||
n = self.fields_number
|
_, delimiter = max((n, v) for v, n in self.delimiter_count.items())
|
||||||
if n > 1:
|
return delimiter
|
||||||
tmpData = []
|
|
||||||
for da in self.data:
|
|
||||||
tmp = []
|
|
||||||
for d in (';', '|', '/', ',', '\t', ' ',):
|
|
||||||
if da.count(d) == (n-1):
|
|
||||||
tmp.append(d)
|
|
||||||
if len(tmp) == 1 and tmp == tmpData:
|
|
||||||
return tmpData[0]
|
|
||||||
else:
|
|
||||||
tmpData = tmp
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def buildAttributes(self):
|
def buildAttributes(self):
|
||||||
# if there is only 1 field of data
|
# if there is only 1 field of data
|
||||||
|
|
Loading…
Reference in New Issue