From b9d72bb043ca7f0a707a7a062e3a554fc51461b2 Mon Sep 17 00:00:00 2001
From: chrisr3d <chris.studer.68@gmail.com>
Date: Thu, 25 Jan 2018 15:44:08 +0100
Subject: [PATCH 1/6] First version of csv import module

- If more than 1 misp type is recognized, for each one an
  attribute is created

- Needs to have header set by user as parameters of the module atm

- Review needed to see the feasibility with fields that can create
  confusion and be interpreted both as misp type or attribute field
  (for instance comment is a misp type and an attribute field)
---
 misp_modules/modules/import_mod/csvimport.py | 113 +++++++++++++++++++
 1 file changed, 113 insertions(+)
 create mode 100644 misp_modules/modules/import_mod/csvimport.py

diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py
new file mode 100644
index 0000000..fc5b4e7
--- /dev/null
+++ b/misp_modules/modules/import_mod/csvimport.py
@@ -0,0 +1,113 @@
+import json, os
+import pymisp
+
+misperrors = {'error': 'Error'}
+mispattributes = {'input': ['file'], 'output': ['MISP attributes']}
+moduleinfo = {'version': '0.1', 'author': 'Christian Studer',
+              'description': 'Import Attributes from a csv file.',
+              'module-type': ['import']}
+moduleconfig = ['header']
+
+duplicatedFields = {'mispType': {'mispComment': 'comment'},
+                    'attrField': {'eventComment': 'comment'}}
+
+def handler(q=False):
+    if q is False:
+        return False
+    request = json.loads(q)
+    if request.get('file'):
+        filename = request['file']
+    else:
+        misperrors['error'] = "Unsupported attributes type"
+        return misperrors
+    if not request.get('config') and not request['config'].get('header'):
+        misperrors['error'] = "Configuration error"
+        return misperrors
+    config = request['config'].get('header')
+    header = []
+    data = []
+    with open(filename, 'r') as f:
+        for line in f:
+            # split comments from data
+            if line.startswith('#'):
+                header.append(line)
+            else:
+                data.append(line)
+    # find which delimiter is used
+    delimiter, length = findDelimiter(config, data)
+    # build the attributes
+    result = buildAttributes(config, data, delimiter, length)
+    r = {'results': [{'types': mispattributes['output'], 'values': result}]}
+    return r
+
+def findDelimiter(header, data):
+    n = len(header)
+    if n > 1:
+        for d in (';', ',', '|'):
+            if data[0].count(d) == (n-1):
+                return d, n
+                break
+    else:
+        return None, 1
+
+def buildAttributes(header, dataValues, delimiter, length):
+    attributes = []
+    # if there is only 1 field of data
+    if delimiter is None:
+        mispType = header[0]
+        for data in dataValues:
+            attributes.append({'type': mispType, 'value': data})
+    else:
+        # split fields that should be recognized as misp attribute types from the others
+        list2pop, misp, head = findMispTypes(header)
+        # for each line of data
+        for data in dataValues:
+            datamisp = []
+            datasplit = data.split(delimiter)
+            # in case there is an empty line or an error
+            if len(datasplit) != length:
+                continue
+            # pop from the line data that matches with a misp type, using the list of indexes
+            for l in list2pop:
+                datamisp.append(datasplit.pop(l).strip())
+            # for each misp type, we create an attribute
+            for m, dm in zip(misp, datamisp):
+                attribute = {'type': m, 'value': dm}
+                for h, ds in zip(head, datasplit):
+                    if h:
+                        attribute[h] = ds.strip()
+                attributes.append(attribute)
+    return attributes
+
+def findMispTypes(header):
+    descFilename = os.path.join(pymisp.__path__[0], 'data/describeTypes.json')
+    with open(descFilename, 'r') as f:
+        MispTypes = json.loads(f.read())['result'].get('types')
+    list2pop = []
+    misp = []
+    head = []
+    for h in reversed(header):
+        n = header.index(h)
+        # fields that are misp attribute types
+        if h in MispTypes:
+            list2pop.append(n)
+            misp.append(h)
+        # handle confusions between misp attribute types and attribute fields
+        elif h in duplicatedFields['mispType']:
+            # fields that should be considered as misp attribute types
+            list2pop.append(n)
+            misp.append(duplicatedFields['mispType'].get(h))
+        elif h in duplicatedFields['attrField']:
+            # fields that should be considered as attribute fields
+            head.append(duplicatedFields['attrField'].get(h))
+        else:
+            head.append(h)
+    # return list of indexes of the misp types, list of the misp types, remaining fields that will be attribute fields
+    return list2pop, misp, head
+
+def introspection():
+    return mispattributes
+
+def version():
+    moduleinfo['config'] = moduleconfig
+    return moduleinfo

From 4d846f968f4a1e2671b64a18a4b6b1371b8f5e41 Mon Sep 17 00:00:00 2001
From: chrisr3d <chris.studer.68@gmail.com>
Date: Fri, 26 Jan 2018 17:11:01 +0100
Subject: [PATCH 2/6] Updated delimiter parsing & data reading functions

---
 misp_modules/modules/import_mod/csvimport.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py
index fc5b4e7..e5ff3b0 100644
--- a/misp_modules/modules/import_mod/csvimport.py
+++ b/misp_modules/modules/import_mod/csvimport.py
@@ -24,13 +24,17 @@ def handler(q=False):
         misperrors['error'] = "Configuration error"
         return misperrors
     config = request['config'].get('header')
-    header = []
+    #header = []
     data = []
     with open(filename, 'r') as f:
         for line in f:
             # split comments from data
-            if line.startswith('#'):
-                header.append(line)
+            if '#' in line:
+                l = line.split('#')[0]
+                if l:
+                    data.append(l)
+                #else:
+                    #header.append(line)
             else:
                 data.append(line)
     # find which delimiter is used
@@ -43,10 +47,9 @@ def handler(q=False):
 def findDelimiter(header, data):
     n = len(header)
     if n > 1:
-        for d in (';', ',', '|'):
+        for d in (';', '|', '/', ','):
             if data[0].count(d) == (n-1):
                 return d, n
-                break
     else:
         return None, 1
 
@@ -100,6 +103,7 @@ def findMispTypes(header):
         elif h in duplicatedFields['attrField']:
             # fields that should be considered as attribute fields
             head.append(duplicatedFields['attrField'].get(h))
+        # otherwise, it is an attribute field
         else:
             head.append(h)
     # return list of indexes of the misp types, list of the misp types, remaining fields that will be attribute fields

From 56cbd72b6526bf6b777cb62cea5f4be99b390463 Mon Sep 17 00:00:00 2001
From: chrisr3d <chris.studer.68@gmail.com>
Date: Sun, 28 Jan 2018 18:12:40 +0100
Subject: [PATCH 3/6] Fixed data treatment & other updates

---
 misp_modules/modules/import_mod/csvimport.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py
index e5ff3b0..4076902 100644
--- a/misp_modules/modules/import_mod/csvimport.py
+++ b/misp_modules/modules/import_mod/csvimport.py
@@ -47,7 +47,7 @@ def handler(q=False):
 def findDelimiter(header, data):
     n = len(header)
     if n > 1:
-        for d in (';', '|', '/', ','):
+        for d in (';', '|', '/', ',', '    '):
             if data[0].count(d) == (n-1):
                 return d, n
     else:
@@ -59,7 +59,7 @@ def buildAttributes(header, dataValues, delimiter, length):
     if delimiter is None:
         mispType = header[0]
         for data in dataValues:
-            attributes.append({'type': mispType, 'value': data})
+            attributes.append({'type': mispType, 'value': data.strip()})
     else:
         # split fields that should be recognized as misp attribute types from the others
         list2pop, misp, head = findMispTypes(header)
@@ -107,7 +107,7 @@ def findMispTypes(header):
         else:
             head.append(h)
     # return list of indexes of the misp types, list of the misp types, remaining fields that will be attribute fields
-    return list2pop, misp, head
+    return list2pop, misp, list(reversed(head))
 
 def introspection():
     return mispattributes

From 529d22cca80820690183a10016f03f38d5047a6c Mon Sep 17 00:00:00 2001
From: chrisr3d <chris.studer.68@gmail.com>
Date: Mon, 29 Jan 2018 09:19:58 +0100
Subject: [PATCH 4/6] fix: skipping empty lines

---
 misp_modules/modules/import_mod/csvimport.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py
index 4076902..50e9837 100644
--- a/misp_modules/modules/import_mod/csvimport.py
+++ b/misp_modules/modules/import_mod/csvimport.py
@@ -59,7 +59,9 @@ def buildAttributes(header, dataValues, delimiter, length):
     if delimiter is None:
         mispType = header[0]
         for data in dataValues:
-            attributes.append({'type': mispType, 'value': data.strip()})
+            d = data.strip()
+            if d:
+                attributes.append({'type': mispType, 'value': d})
     else:
         # split fields that should be recognized as misp attribute types from the others
         list2pop, misp, head = findMispTypes(header)

From b2ec186ccb14a48b3e4370925bc05fb439081d06 Mon Sep 17 00:00:00 2001
From: chrisr3d <chris.studer.68@gmail.com>
Date: Mon, 29 Jan 2018 17:04:32 +0100
Subject: [PATCH 5/6] Updated delimiter finder method

---
 misp_modules/modules/import_mod/csvimport.py | 23 ++++++++++++--------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py
index 50e9837..7e0107f 100644
--- a/misp_modules/modules/import_mod/csvimport.py
+++ b/misp_modules/modules/import_mod/csvimport.py
@@ -30,13 +30,11 @@ def handler(q=False):
         for line in f:
             # split comments from data
             if '#' in line:
-                l = line.split('#')[0]
-                if l:
-                    data.append(l)
-                #else:
-                    #header.append(line)
+                l = line.split('#')[0].strip()
             else:
-                data.append(line)
+                l = line.strip()
+            if l:
+                data.append(l)
     # find which delimiter is used
     delimiter, length = findDelimiter(config, data)
     # build the attributes
@@ -47,9 +45,16 @@ def handler(q=False):
 def findDelimiter(header, data):
     n = len(header)
     if n > 1:
-        for d in (';', '|', '/', ',', '    '):
-            if data[0].count(d) == (n-1):
-                return d, n
+        tmpData = []
+        for da in data:
+            tmp = []
+            for d in (';', '|', '/', ',', '\t', '    ',):
+                if da.count(d) == (n-1):
+                    tmp.append(d)
+            if len(tmp) == 1 and tmp == tmpData:
+                return tmpData[0], n
+            else:
+                tmpData = tmp
     else:
         return None, 1
 

From 71c00954d08862f58de8d218fdf4407fb8104f6d Mon Sep 17 00:00:00 2001
From: chrisr3d <chris.studer.68@gmail.com>
Date: Tue, 30 Jan 2018 11:20:28 +0100
Subject: [PATCH 6/6] fix: Solved reading problems for some files

---
 misp_modules/modules/import_mod/csvimport.py | 22 +++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py
index 7e0107f..dc67eec 100644
--- a/misp_modules/modules/import_mod/csvimport.py
+++ b/misp_modules/modules/import_mod/csvimport.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 import json, os
 import pymisp
 
@@ -25,8 +26,20 @@ def handler(q=False):
         return misperrors
     config = request['config'].get('header')
     #header = []
+    try:
+        data = readFile(filename, 'utf-8')
+    except:
+        data = readFile(filename, 'iso-8859-1')
+    # find which delimiter is used
+    delimiter, length = findDelimiter(config, data)
+    # build the attributes
+    result = buildAttributes(config, data, delimiter, length)
+    r = {'results': [{'types': mispattributes['output'], 'values': result}]}
+    return r
+
+def readFile(filename, encoding):
     data = []
-    with open(filename, 'r') as f:
+    with open(filename, 'r', encoding=encoding) as f:
         for line in f:
             # split comments from data
             if '#' in line:
@@ -35,12 +48,7 @@ def handler(q=False):
                 l = line.strip()
             if l:
                 data.append(l)
-    # find which delimiter is used
-    delimiter, length = findDelimiter(config, data)
-    # build the attributes
-    result = buildAttributes(config, data, delimiter, length)
-    r = {'results': [{'types': mispattributes['output'], 'values': result}]}
-    return r
+    return data
 
 def findDelimiter(header, data):
     n = len(header)