misp-modules/misp_modules/modules/import_mod/csvimport.py

# -*- coding: utf-8 -*-
import json, os
import pymisp

misperrors = {'error': 'Error'}
mispattributes = {'input': ['file'], 'output': ['MISP attributes']}
moduleinfo = {'version': '0.1', 'author': 'Christian Studer',
              'description': 'Import Attributes from a csv file.',
              'module-type': ['import']}
moduleconfig = ['header']

duplicatedFields = {'mispType': {'mispComment': 'comment'},
                    'attrField': {'eventComment': 'comment'}}

def handler(q=False):
    if q is False:
        return False
    request = json.loads(q)
    if request.get('file'):
        filename = request['file']
    else:
        misperrors['error'] = "Unsupported attributes type"
        return misperrors
    if not request.get('config') and not request['config'].get('header'):
        misperrors['error'] = "Configuration error"
        return misperrors
    config = request['config'].get('header')
    #header = []
    try:
        data = readFile(filename, 'utf-8')
    except:
        data = readFile(filename, 'iso-8859-1')
    # find which delimiter is used
    delimiter, length = findDelimiter(config, data)
    # build the attributes
    result = buildAttributes(config, data, delimiter, length)
    r = {'results': [{'types': mispattributes['output'], 'values': result}]}
    return r

def readFile(filename, encoding):
    data = []
    with open(filename, 'r', encoding=encoding) as f:
        for line in f:
            # split comments from data
            if '#' in line:
                l = line.split('#')[0].strip()
            else:
                l = line.strip()
            if l:
                data.append(l)
    return data

def findDelimiter(header, data):
    n = len(header)
    if n > 1:
        tmpData = []
        for da in data:
            tmp = []
            for d in (';', '|', '/', ',', '\t', '    ',):
                if da.count(d) == (n-1):
                    tmp.append(d)
            if len(tmp) == 1 and tmp == tmpData:
                return tmpData[0], n
            else:
                tmpData = tmp
    else:
        return None, 1

def buildAttributes(header, dataValues, delimiter, length):
    attributes = []
    # if there is only 1 field of data
    if delimiter is None:
        mispType = header[0]
        for data in dataValues:
            d = data.strip()
            if d:
                attributes.append({'type': mispType, 'value': d})
    else:
        # split fields that should be recognized as misp attribute types from the others
        list2pop, misp, head = findMispTypes(header)
        # for each line of data
        for data in dataValues:
            datamisp = []
            datasplit = data.split(delimiter)
            # in case there is an empty line or an error
            if len(datasplit) != length:
                continue
            # pop from the line data that matches with a misp type, using the list of indexes
            for l in list2pop:
                datamisp.append(datasplit.pop(l).strip())
            # for each misp type, we create an attribute
            for m, dm in zip(misp, datamisp):
                attribute = {'type': m, 'value': dm}
                for h, ds in zip(head, datasplit):
                    if h:
                        attribute[h] = ds.strip()
                attributes.append(attribute)
    return attributes

def findMispTypes(header):
    descFilename = os.path.join(pymisp.__path__[0], 'data/describeTypes.json')
    with open(descFilename, 'r') as f:
        MispTypes = json.loads(f.read())['result'].get('types')
    list2pop = []
    misp = []
    head = []
    for h in reversed(header):
        n = header.index(h)
        # fields that are misp attribute types
        if h in MispTypes:
            list2pop.append(n)
            misp.append(h)
        # handle confusions between misp attribute types and attribute fields
        elif h in duplicatedFields['mispType']:
            # fields that should be considered as misp attribute types
            list2pop.append(n)
            misp.append(duplicatedFields['mispType'].get(h))
        elif h in duplicatedFields['attrField']:
            # fields that should be considered as attribute fields
            head.append(duplicatedFields['attrField'].get(h))
        # otherwise, it is an attribute field
        else:
            head.append(h)
    # return list of indexes of the misp types, list of the misp types, remaining fields that will be attribute fields
    return list2pop, misp, list(reversed(head))

def introspection():
    return mispattributes

def version():
    moduleinfo['config'] = moduleconfig
    return moduleinfo
fix: Solved reading problems for some files 2018-01-30 11:20:28 +01:00			`# -- coding: utf-8 --`
First version of csv import module - If more than 1 misp type is recognized, for each one an attribute is created - Needs to have header set by user as parameters of the module atm - Review needed to see the feasibility with fields that can create confusion and be interpreted both as misp type or attribute field (for instance comment is a misp type and an attribute field) 2018-01-25 15:44:08 +01:00			`import json, os`
			`import pymisp`

			`misperrors = {'error': 'Error'}`
			`mispattributes = {'input': ['file'], 'output': ['MISP attributes']}`
			`moduleinfo = {'version': '0.1', 'author': 'Christian Studer',`
			`'description': 'Import Attributes from a csv file.',`
			`'module-type': ['import']}`
			`moduleconfig = ['header']`

			`duplicatedFields = {'mispType': {'mispComment': 'comment'},`
			`'attrField': {'eventComment': 'comment'}}`

			`def handler(q=False):`
			`if q is False:`
			`return False`
			`request = json.loads(q)`
			`if request.get('file'):`
			`filename = request['file']`
			`else:`
			`misperrors['error'] = "Unsupported attributes type"`
			`return misperrors`
			`if not request.get('config') and not request['config'].get('header'):`
			`misperrors['error'] = "Configuration error"`
			`return misperrors`
			`config = request['config'].get('header')`
Updated delimiter parsing & data reading functions 2018-01-26 17:11:01 +01:00			`#header = []`
fix: Solved reading problems for some files 2018-01-30 11:20:28 +01:00			`try:`
			`data = readFile(filename, 'utf-8')`
			`except:`
			`data = readFile(filename, 'iso-8859-1')`
			`# find which delimiter is used`
			`delimiter, length = findDelimiter(config, data)`
			`# build the attributes`
			`result = buildAttributes(config, data, delimiter, length)`
			`r = {'results': [{'types': mispattributes['output'], 'values': result}]}`
			`return r`

			`def readFile(filename, encoding):`
First version of csv import module - If more than 1 misp type is recognized, for each one an attribute is created - Needs to have header set by user as parameters of the module atm - Review needed to see the feasibility with fields that can create confusion and be interpreted both as misp type or attribute field (for instance comment is a misp type and an attribute field) 2018-01-25 15:44:08 +01:00			`data = []`
fix: Solved reading problems for some files 2018-01-30 11:20:28 +01:00			`with open(filename, 'r', encoding=encoding) as f:`
First version of csv import module - If more than 1 misp type is recognized, for each one an attribute is created - Needs to have header set by user as parameters of the module atm - Review needed to see the feasibility with fields that can create confusion and be interpreted both as misp type or attribute field (for instance comment is a misp type and an attribute field) 2018-01-25 15:44:08 +01:00			`for line in f:`
			`# split comments from data`
Updated delimiter parsing & data reading functions 2018-01-26 17:11:01 +01:00			`if '#' in line:`
Updated delimiter finder method 2018-01-29 17:04:32 +01:00			`l = line.split('#')[0].strip()`
First version of csv import module - If more than 1 misp type is recognized, for each one an attribute is created - Needs to have header set by user as parameters of the module atm - Review needed to see the feasibility with fields that can create confusion and be interpreted both as misp type or attribute field (for instance comment is a misp type and an attribute field) 2018-01-25 15:44:08 +01:00			`else:`
Updated delimiter finder method 2018-01-29 17:04:32 +01:00			`l = line.strip()`
			`if l:`
			`data.append(l)`
fix: Solved reading problems for some files 2018-01-30 11:20:28 +01:00			`return data`
First version of csv import module - If more than 1 misp type is recognized, for each one an attribute is created - Needs to have header set by user as parameters of the module atm - Review needed to see the feasibility with fields that can create confusion and be interpreted both as misp type or attribute field (for instance comment is a misp type and an attribute field) 2018-01-25 15:44:08 +01:00
			`def findDelimiter(header, data):`
			`n = len(header)`
			`if n > 1:`
Updated delimiter finder method 2018-01-29 17:04:32 +01:00			`tmpData = []`
			`for da in data:`
			`tmp = []`
			`for d in (';', '\|', '/', ',', '\t', ' ',):`
			`if da.count(d) == (n-1):`
			`tmp.append(d)`
			`if len(tmp) == 1 and tmp == tmpData:`
			`return tmpData[0], n`
			`else:`
			`tmpData = tmp`
First version of csv import module - If more than 1 misp type is recognized, for each one an attribute is created - Needs to have header set by user as parameters of the module atm - Review needed to see the feasibility with fields that can create confusion and be interpreted both as misp type or attribute field (for instance comment is a misp type and an attribute field) 2018-01-25 15:44:08 +01:00			`else:`
			`return None, 1`

			`def buildAttributes(header, dataValues, delimiter, length):`
			`attributes = []`
			`# if there is only 1 field of data`
			`if delimiter is None:`
			`mispType = header[0]`
			`for data in dataValues:`
fix: skipping empty lines 2018-01-29 09:19:58 +01:00			`d = data.strip()`
			`if d:`
			`attributes.append({'type': mispType, 'value': d})`
First version of csv import module - If more than 1 misp type is recognized, for each one an attribute is created - Needs to have header set by user as parameters of the module atm - Review needed to see the feasibility with fields that can create confusion and be interpreted both as misp type or attribute field (for instance comment is a misp type and an attribute field) 2018-01-25 15:44:08 +01:00			`else:`
			`# split fields that should be recognized as misp attribute types from the others`
			`list2pop, misp, head = findMispTypes(header)`
			`# for each line of data`
			`for data in dataValues:`
			`datamisp = []`
			`datasplit = data.split(delimiter)`
			`# in case there is an empty line or an error`
			`if len(datasplit) != length:`
			`continue`
			`# pop from the line data that matches with a misp type, using the list of indexes`
			`for l in list2pop:`
			`datamisp.append(datasplit.pop(l).strip())`
			`# for each misp type, we create an attribute`
			`for m, dm in zip(misp, datamisp):`
			`attribute = {'type': m, 'value': dm}`
			`for h, ds in zip(head, datasplit):`
			`if h:`
			`attribute[h] = ds.strip()`
			`attributes.append(attribute)`
			`return attributes`

			`def findMispTypes(header):`
			`descFilename = os.path.join(pymisp.__path__[0], 'data/describeTypes.json')`
			`with open(descFilename, 'r') as f:`
			`MispTypes = json.loads(f.read())['result'].get('types')`
			`list2pop = []`
			`misp = []`
			`head = []`
			`for h in reversed(header):`
			`n = header.index(h)`
			`# fields that are misp attribute types`
			`if h in MispTypes:`
			`list2pop.append(n)`
			`misp.append(h)`
			`# handle confusions between misp attribute types and attribute fields`
			`elif h in duplicatedFields['mispType']:`
			`# fields that should be considered as misp attribute types`
			`list2pop.append(n)`
			`misp.append(duplicatedFields['mispType'].get(h))`
			`elif h in duplicatedFields['attrField']:`
			`# fields that should be considered as attribute fields`
			`head.append(duplicatedFields['attrField'].get(h))`
Updated delimiter parsing & data reading functions 2018-01-26 17:11:01 +01:00			`# otherwise, it is an attribute field`
First version of csv import module - If more than 1 misp type is recognized, for each one an attribute is created - Needs to have header set by user as parameters of the module atm - Review needed to see the feasibility with fields that can create confusion and be interpreted both as misp type or attribute field (for instance comment is a misp type and an attribute field) 2018-01-25 15:44:08 +01:00			`else:`
			`head.append(h)`
			`# return list of indexes of the misp types, list of the misp types, remaining fields that will be attribute fields`
Fixed data treatment & other updates 2018-01-28 18:12:40 +01:00			`return list2pop, misp, list(reversed(head))`
First version of csv import module - If more than 1 misp type is recognized, for each one an attribute is created - Needs to have header set by user as parameters of the module atm - Review needed to see the feasibility with fields that can create confusion and be interpreted both as misp type or attribute field (for instance comment is a misp type and an attribute field) 2018-01-25 15:44:08 +01:00
			`def introspection():`
			`return mispattributes`

			`def version():`
			`moduleinfo['config'] = moduleconfig`
			`return moduleinfo`