misp-modules/misp_modules/modules/import_mod/csvimport.py

# -*- coding: utf-8 -*-
import json, os, base64
import pymisp

misperrors = {'error': 'Error'}
moduleinfo = {'version': '0.1', 'author': 'Christian Studer',
              'description': 'Import Attributes from a csv file.',
              'module-type': ['import']}
moduleconfig = []
inputSource = ['file']
userConfig = {'header': {
                'type': 'String',
                'message': 'Define the header of the csv file, with types (included in MISP attribute types or attribute fields) separated by commas.\nFor fields that do not match these types, please use space or simply nothing between commas.\nFor instance: ip-src,domain, ,timestamp'}}

duplicatedFields = {'mispType': {'mispComment': 'comment'},
                    'attrField': {'attrComment': 'comment'}}
attributesFields = ['type', 'value', 'category', 'to_ids', 'comment', 'distribution']

class CsvParser():
    def __init__(self, header):
        self.header = header
        self.attributes = []

    def parse_data(self, data):
        return_data = []
        for line in data:
            l = line.split('#')[0].strip() if '#' in line else line.strip()
            if l:
                return_data.append(l)
        self.data = return_data
        # find which delimiter is used
        self.delimiter, self.length = self.findDelimiter()

    def findDelimiter(self):
        n = len(self.header)
        if n > 1:
            tmpData = []
            for da in self.data:
                tmp = []
                for d in (';', '|', '/', ',', '\t', '    ',):
                    if da.count(d) == (n-1):
                        tmp.append(d)
                if len(tmp) == 1 and tmp == tmpData:
                    return tmpData[0], n
                else:
                    tmpData = tmp
        else:
            return None, 1

    def buildAttributes(self):
        # if there is only 1 field of data
        if self.delimiter is None:
            mispType = self.header[0]
            for data in self.data:
                d = data.strip()
                if d:
                    self.attributes.append({'types': mispType, 'values': d})
        else:
            # split fields that should be recognized as misp attribute types from the others
            list2pop, misp, head = self.findMispTypes()
            # for each line of data
            for data in self.data:
                datamisp = []
                datasplit = data.split(self.delimiter)
                # in case there is an empty line or an error
                if len(datasplit) != self.length:
                    continue
                # pop from the line data that matches with a misp type, using the list of indexes
                for l in list2pop:
                    datamisp.append(datasplit.pop(l).strip())
                # for each misp type, we create an attribute
                for m, dm in zip(misp, datamisp):
                    attribute = {'types': m, 'values': dm}
                    for h, ds in zip(head, datasplit):
                        if h:
                            attribute[h] = ds.strip()
                    self.attributes.append(attribute)

    def findMispTypes(self):
        descFilename = os.path.join(pymisp.__path__[0], 'data/describeTypes.json')
        with open(descFilename, 'r') as f:
            MispTypes = json.loads(f.read())['result'].get('types')
        list2pop = []
        misp = []
        head = []
        for h in reversed(self.header):
            n = self.header.index(h)
            # fields that are misp attribute types
            if h in MispTypes:
                list2pop.append(n)
                misp.append(h)
            # handle confusions between misp attribute types and attribute fields
            elif h in duplicatedFields['mispType']:
                # fields that should be considered as misp attribute types
                list2pop.append(n)
                misp.append(duplicatedFields['mispType'].get(h))
            elif h in duplicatedFields['attrField']:
                # fields that should be considered as attribute fields
                head.append(duplicatedFields['attrField'].get(h))
            # or, it could be an attribute field
            elif h in attributesFields:
                head.append(h)
            # otherwise, it is not defined
            else:
                head.append('')
        # return list of indexes of the misp types, list of the misp types, remaining fields that will be attribute fields
        return list2pop, misp, list(reversed(head))

def handler(q=False):
    if q is False:
        return False
    request = json.loads(q)
    if request.get('data'):
        data = base64.b64decode(request['data']).decode('utf-8')
    else:
        misperrors['error'] = "Unsupported attributes type"
        return misperrors
    if not request.get('config') and not request['config'].get('header'):
        misperrors['error'] = "Configuration error"
        return misperrors
    config = request['config'].get('header').split(',')
    config = [c.strip() for c in config]
    csv_parser = CsvParser(config)
    csv_parser.parse_data(data.split('\n'))
    # build the attributes
    csv_parser.buildAttributes()
    r = {'results': csv_parser.attributes}
    return r

def introspection():
    modulesetup = {}
    try:
        userConfig
        modulesetup['userConfig'] = userConfig
    except NameError:
        pass
    try:
        inputSource
        modulesetup['inputSource'] = inputSource
    except NameError:
        pass
    return modulesetup

def version():
    moduleinfo['config'] = moduleconfig
    return moduleinfo
fix: Solved reading problems for some files 2018-01-30 11:20:28 +01:00			`# -- coding: utf-8 --`
fix: Fixed input & output of the module Also updated some functions 2018-03-02 09:03:51 +01:00			`import json, os, base64`
First version of csv import module - If more than 1 misp type is recognized, for each one an attribute is created - Needs to have header set by user as parameters of the module atm - Review needed to see the feasibility with fields that can create confusion and be interpreted both as misp type or attribute field (for instance comment is a misp type and an attribute field) 2018-01-25 15:44:08 +01:00			`import pymisp`

			`misperrors = {'error': 'Error'}`
			`moduleinfo = {'version': '0.1', 'author': 'Christian Studer',`
			`'description': 'Import Attributes from a csv file.',`
			`'module-type': ['import']}`
fix: Using userConfig to define the header instead of moduleconfig 2018-05-17 13:47:49 +02:00			`moduleconfig = []`
			`inputSource = ['file']`
			`userConfig = {'header': {`
			`'type': 'String',`
			`'message': 'Define the header of the csv file, with types (included in MISP attribute types or attribute fields) separated by commas.\nFor fields that do not match these types, please use space or simply nothing between commas.\nFor instance: ip-src,domain, ,timestamp'}}`
First version of csv import module - If more than 1 misp type is recognized, for each one an attribute is created - Needs to have header set by user as parameters of the module atm - Review needed to see the feasibility with fields that can create confusion and be interpreted both as misp type or attribute field (for instance comment is a misp type and an attribute field) 2018-01-25 15:44:08 +01:00
			`duplicatedFields = {'mispType': {'mispComment': 'comment'},`
Clarified functions arguments using a class 2018-03-05 19:59:30 +01:00			`'attrField': {'attrComment': 'comment'}}`
fix: Avoid trying to build attributes with not intended fields - Previously: if the header field is not an attribute type, then it was added as an attribute field. PyMISP then used to skip it if needed - Now: Those fields are discarded before they are put in an attribute 2018-05-17 16:24:11 +02:00			`attributesFields = ['type', 'value', 'category', 'to_ids', 'comment', 'distribution']`
Clarified functions arguments using a class 2018-03-05 19:59:30 +01:00
			`class CsvParser():`
			`def __init__(self, header):`
			`self.header = header`
			`self.attributes = []`

			`def parse_data(self, data):`
			`return_data = []`
			`for line in data:`
			`l = line.split('#')[0].strip() if '#' in line else line.strip()`
			`if l:`
			`return_data.append(l)`
			`self.data = return_data`
			`# find which delimiter is used`
			`self.delimiter, self.length = self.findDelimiter()`

			`def findDelimiter(self):`
			`n = len(self.header)`
			`if n > 1:`
			`tmpData = []`
			`for da in self.data:`
			`tmp = []`
			`for d in (';', '\|', '/', ',', '\t', ' ',):`
			`if da.count(d) == (n-1):`
			`tmp.append(d)`
			`if len(tmp) == 1 and tmp == tmpData:`
			`return tmpData[0], n`
			`else:`
			`tmpData = tmp`
			`else:`
			`return None, 1`

			`def buildAttributes(self):`
			`# if there is only 1 field of data`
			`if self.delimiter is None:`
			`mispType = self.header[0]`
			`for data in self.data:`
			`d = data.strip()`
			`if d:`
			`self.attributes.append({'types': mispType, 'values': d})`
			`else:`
			`# split fields that should be recognized as misp attribute types from the others`
			`list2pop, misp, head = self.findMispTypes()`
			`# for each line of data`
			`for data in self.data:`
			`datamisp = []`
			`datasplit = data.split(self.delimiter)`
			`# in case there is an empty line or an error`
			`if len(datasplit) != self.length:`
			`continue`
			`# pop from the line data that matches with a misp type, using the list of indexes`
			`for l in list2pop:`
			`datamisp.append(datasplit.pop(l).strip())`
			`# for each misp type, we create an attribute`
			`for m, dm in zip(misp, datamisp):`
			`attribute = {'types': m, 'values': dm}`
			`for h, ds in zip(head, datasplit):`
			`if h:`
			`attribute[h] = ds.strip()`
			`self.attributes.append(attribute)`

			`def findMispTypes(self):`
			`descFilename = os.path.join(pymisp.__path__[0], 'data/describeTypes.json')`
			`with open(descFilename, 'r') as f:`
			`MispTypes = json.loads(f.read())['result'].get('types')`
			`list2pop = []`
			`misp = []`
			`head = []`
			`for h in reversed(self.header):`
			`n = self.header.index(h)`
			`# fields that are misp attribute types`
			`if h in MispTypes:`
			`list2pop.append(n)`
			`misp.append(h)`
			`# handle confusions between misp attribute types and attribute fields`
			`elif h in duplicatedFields['mispType']:`
			`# fields that should be considered as misp attribute types`
			`list2pop.append(n)`
			`misp.append(duplicatedFields['mispType'].get(h))`
			`elif h in duplicatedFields['attrField']:`
			`# fields that should be considered as attribute fields`
			`head.append(duplicatedFields['attrField'].get(h))`
fix: Avoid trying to build attributes with not intended fields - Previously: if the header field is not an attribute type, then it was added as an attribute field. PyMISP then used to skip it if needed - Now: Those fields are discarded before they are put in an attribute 2018-05-17 16:24:11 +02:00			`# or, it could be an attribute field`
			`elif h in attributesFields:`
Clarified functions arguments using a class 2018-03-05 19:59:30 +01:00			`head.append(h)`
fix: Avoid trying to build attributes with not intended fields - Previously: if the header field is not an attribute type, then it was added as an attribute field. PyMISP then used to skip it if needed - Now: Those fields are discarded before they are put in an attribute 2018-05-17 16:24:11 +02:00			`# otherwise, it is not defined`
			`else:`
			`head.append('')`
Clarified functions arguments using a class 2018-03-05 19:59:30 +01:00			`# return list of indexes of the misp types, list of the misp types, remaining fields that will be attribute fields`
			`return list2pop, misp, list(reversed(head))`
First version of csv import module - If more than 1 misp type is recognized, for each one an attribute is created - Needs to have header set by user as parameters of the module atm - Review needed to see the feasibility with fields that can create confusion and be interpreted both as misp type or attribute field (for instance comment is a misp type and an attribute field) 2018-01-25 15:44:08 +01:00
			`def handler(q=False):`
			`if q is False:`
			`return False`
			`request = json.loads(q)`
fix: Fixed input & output of the module Also updated some functions 2018-03-02 09:03:51 +01:00			`if request.get('data'):`
			`data = base64.b64decode(request['data']).decode('utf-8')`
First version of csv import module - If more than 1 misp type is recognized, for each one an attribute is created - Needs to have header set by user as parameters of the module atm - Review needed to see the feasibility with fields that can create confusion and be interpreted both as misp type or attribute field (for instance comment is a misp type and an attribute field) 2018-01-25 15:44:08 +01:00			`else:`
			`misperrors['error'] = "Unsupported attributes type"`
			`return misperrors`
			`if not request.get('config') and not request['config'].get('header'):`
			`misperrors['error'] = "Configuration error"`
			`return misperrors`
fix: Fixed input & output of the module Also updated some functions 2018-03-02 09:03:51 +01:00			`config = request['config'].get('header').split(',')`
			`config = [c.strip() for c in config]`
Clarified functions arguments using a class 2018-03-05 19:59:30 +01:00			`csv_parser = CsvParser(config)`
			`csv_parser.parse_data(data.split('\n'))`
fix: Solved reading problems for some files 2018-01-30 11:20:28 +01:00			`# build the attributes`
Clarified functions arguments using a class 2018-03-05 19:59:30 +01:00			`csv_parser.buildAttributes()`
			`r = {'results': csv_parser.attributes}`
fix: Solved reading problems for some files 2018-01-30 11:20:28 +01:00			`return r`

First version of csv import module - If more than 1 misp type is recognized, for each one an attribute is created - Needs to have header set by user as parameters of the module atm - Review needed to see the feasibility with fields that can create confusion and be interpreted both as misp type or attribute field (for instance comment is a misp type and an attribute field) 2018-01-25 15:44:08 +01:00			`def introspection():`
fix: Using userConfig to define the header instead of moduleconfig 2018-05-17 13:47:49 +02:00			`modulesetup = {}`
			`try:`
			`userConfig`
			`modulesetup['userConfig'] = userConfig`
			`except NameError:`
			`pass`
			`try:`
			`inputSource`
			`modulesetup['inputSource'] = inputSource`
			`except NameError:`
			`pass`
			`return modulesetup`
First version of csv import module - If more than 1 misp type is recognized, for each one an attribute is created - Needs to have header set by user as parameters of the module atm - Review needed to see the feasibility with fields that can create confusion and be interpreted both as misp type or attribute field (for instance comment is a misp type and an attribute field) 2018-01-25 15:44:08 +01:00
			`def version():`
			`moduleinfo['config'] = moduleconfig`
			`return moduleinfo`