diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py index 5cfbc67..543d67b 100644 --- a/misp_modules/modules/import_mod/csvimport.py +++ b/misp_modules/modules/import_mod/csvimport.py @@ -10,7 +10,94 @@ moduleinfo = {'version': '0.1', 'author': 'Christian Studer', moduleconfig = ['header'] duplicatedFields = {'mispType': {'mispComment': 'comment'}, - 'attrField': {'eventComment': 'comment'}} + 'attrField': {'attrComment': 'comment'}} + +class CsvParser(): + def __init__(self, header): + self.header = header + self.attributes = [] + + def parse_data(self, data): + return_data = [] + for line in data: + l = line.split('#')[0].strip() if '#' in line else line.strip() + if l: + return_data.append(l) + self.data = return_data + # find which delimiter is used + self.delimiter, self.length = self.findDelimiter() + + def findDelimiter(self): + n = len(self.header) + if n > 1: + tmpData = [] + for da in self.data: + tmp = [] + for d in (';', '|', '/', ',', '\t', ' ',): + if da.count(d) == (n-1): + tmp.append(d) + if len(tmp) == 1 and tmp == tmpData: + return tmpData[0], n + else: + tmpData = tmp + else: + return None, 1 + + def buildAttributes(self): + # if there is only 1 field of data + if self.delimiter is None: + mispType = self.header[0] + for data in self.data: + d = data.strip() + if d: + self.attributes.append({'types': mispType, 'values': d}) + else: + # split fields that should be recognized as misp attribute types from the others + list2pop, misp, head = self.findMispTypes() + # for each line of data + for data in self.data: + datamisp = [] + datasplit = data.split(self.delimiter) + # in case there is an empty line or an error + if len(datasplit) != self.length: + continue + # pop from the line data that matches with a misp type, using the list of indexes + for l in list2pop: + datamisp.append(datasplit.pop(l).strip()) + # for each misp type, we create an attribute + for m, dm in zip(misp, datamisp): + attribute = {'types': m, 'values': dm} + for h, ds in zip(head, datasplit): + if h: + attribute[h] = ds.strip() + self.attributes.append(attribute) + + def findMispTypes(self): + descFilename = os.path.join(pymisp.__path__[0], 'data/describeTypes.json') + with open(descFilename, 'r') as f: + MispTypes = json.loads(f.read())['result'].get('types') + list2pop = [] + misp = [] + head = [] + for h in reversed(self.header): + n = self.header.index(h) + # fields that are misp attribute types + if h in MispTypes: + list2pop.append(n) + misp.append(h) + # handle confusions between misp attribute types and attribute fields + elif h in duplicatedFields['mispType']: + # fields that should be considered as misp attribute types + list2pop.append(n) + misp.append(duplicatedFields['mispType'].get(h)) + elif h in duplicatedFields['attrField']: + # fields that should be considered as attribute fields + head.append(duplicatedFields['attrField'].get(h)) + # otherwise, it is an attribute field + else: + head.append(h) + # return list of indexes of the misp types, list of the misp types, remaining fields that will be attribute fields + return list2pop, misp, list(reversed(head)) def handler(q=False): if q is False: @@ -26,96 +113,13 @@ def handler(q=False): return misperrors config = request['config'].get('header').split(',') config = [c.strip() for c in config] - data = parse_data(data.split('\n')) - # find which delimiter is used - delimiter, length = findDelimiter(config, data) + csv_parser = CsvParser(config) + csv_parser.parse_data(data.split('\n')) # build the attributes - result = buildAttributes(config, data, delimiter, length) - r = {'results': result} + csv_parser.buildAttributes() + r = {'results': csv_parser.attributes} return r -def parse_data(data): - return_data = [] - for line in data: - l = line.split('#')[0].strip() if '#' in line else line.strip() - if l: - return_data.append(l) - return return_data - -def findDelimiter(header, data): - n = len(header) - if n > 1: - tmpData = [] - for da in data: - tmp = [] - for d in (';', '|', '/', ',', '\t', ' ',): - if da.count(d) == (n-1): - tmp.append(d) - if len(tmp) == 1 and tmp == tmpData: - return tmpData[0], n - else: - tmpData = tmp - else: - return None, 1 - -def buildAttributes(header, dataValues, delimiter, length): - attributes = [] - # if there is only 1 field of data - if delimiter is None: - mispType = header[0] - for data in dataValues: - d = data.strip() - if d: - attributes.append({'types': mispType, 'values': d}) - else: - # split fields that should be recognized as misp attribute types from the others - list2pop, misp, head = findMispTypes(header) - # for each line of data - for data in dataValues: - datamisp = [] - datasplit = data.split(delimiter) - # in case there is an empty line or an error - if len(datasplit) != length: - continue - # pop from the line data that matches with a misp type, using the list of indexes - for l in list2pop: - datamisp.append(datasplit.pop(l).strip()) - # for each misp type, we create an attribute - for m, dm in zip(misp, datamisp): - attribute = {'types': m, 'values': dm} - for h, ds in zip(head, datasplit): - if h: - attribute[h] = ds.strip() - attributes.append(attribute) - return attributes - -def findMispTypes(header): - descFilename = os.path.join(pymisp.__path__[0], 'data/describeTypes.json') - with open(descFilename, 'r') as f: - MispTypes = json.loads(f.read())['result'].get('types') - list2pop = [] - misp = [] - head = [] - for h in reversed(header): - n = header.index(h) - # fields that are misp attribute types - if h in MispTypes: - list2pop.append(n) - misp.append(h) - # handle confusions between misp attribute types and attribute fields - elif h in duplicatedFields['mispType']: - # fields that should be considered as misp attribute types - list2pop.append(n) - misp.append(duplicatedFields['mispType'].get(h)) - elif h in duplicatedFields['attrField']: - # fields that should be considered as attribute fields - head.append(duplicatedFields['attrField'].get(h)) - # otherwise, it is an attribute field - else: - head.append(h) - # return list of indexes of the misp types, list of the misp types, remaining fields that will be attribute fields - return list2pop, misp, list(reversed(head)) - def introspection(): return mispattributes