From 63ba7580d3d45cdc93495ce04286f906ed294cd8 Mon Sep 17 00:00:00 2001
From: chrisr3d <chris.studer.68@gmail.com>
Date: Fri, 27 Jul 2018 23:13:47 +0200
Subject: [PATCH 1/4] chg: Updated csvimport to support files from csv export +
 import MISP objects

---
 misp_modules/modules/import_mod/csvimport.py | 105 ++++++++++++++++---
 1 file changed, 88 insertions(+), 17 deletions(-)

diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py
index 5ccf287..9b19fc2 100644
--- a/misp_modules/modules/import_mod/csvimport.py
+++ b/misp_modules/modules/import_mod/csvimport.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 import json, os, base64
-import pymisp
+from pymisp import __path__ as pymisp_path
+from collections import defaultdict
 
 misperrors = {'error': 'Error'}
 moduleinfo = {'version': '0.1', 'author': 'Christian Studer',
@@ -13,20 +14,49 @@ userConfig = {'header': {
                 'message': 'Define the header of the csv file, with types (included in MISP attribute types or attribute fields) separated by commas.\nFor fields that do not match these types, please use space or simply nothing between commas.\nFor instance: ip-src,domain, ,timestamp'},
               'has_header':{
                 'type': 'Boolean',
-                'message': 'Tick this box ONLY if there is a header line, NOT COMMENTED, in the file (which will be skipped atm).'
+                'message': 'Tick this box ONLY if there is a header line, NOT COMMENTED, in the file.'
               }}
 
 duplicatedFields = {'mispType': {'mispComment': 'comment'},
                     'attrField': {'attrComment': 'comment'}}
 attributesFields = ['type', 'value', 'category', 'to_ids', 'comment', 'distribution']
+misp_standard_csv_header = ['uuid','event_id','category','type','value','comment','to_ids','date',
+                            'object_relation','object_uuid','object_name','object_meta_category']
 delimiters = [',', ';', '|', '/', '\t', '    ']
 
 class CsvParser():
-    def __init__(self, header, has_header):
-        self.header = header
-        self.fields_number = len(header)
-        self.has_header = has_header
-        self.attributes = []
+    def __init__(self, header, has_header, data):
+        if data[0].split(',') == misp_standard_csv_header:
+            self.header = misp_standard_csv_header
+            self.from_misp = True
+            self.data = data[1:]
+        else:
+            self.from_misp = False
+            self.has_header = has_header
+            if header:
+                self.header = header
+                self.fields_number = len(header)
+                self.parse_data(data)
+            else:
+                self.has_delimiter = True
+                self.fields_number, self.delimiter, self.header = self.get_delimiter_from_header(data[0])
+                self.data = data
+            self.result = []
+
+    def get_delimiter_from_header(self, data):
+        delimiters_count = {}
+        for d in delimiters:
+            length = data.count(d)
+            if length > 0:
+                delimiters_count[d] = data.count(d)
+        if len(delimiters_count) == 0:
+            length = 0
+            delimiter = None
+            header = [data]
+        else:
+            length, delimiter = max((n, v) for v, n in delimiters_count.items())
+            header = data.split(delimiter)
+        return length + 1, delimiter, header
 
     def parse_data(self, data):
         return_data = []
@@ -45,6 +75,7 @@ class CsvParser():
                     return_data.append(l)
             # find which delimiter is used
             self.delimiter = self.find_delimiter()
+            if self.fields_number == 0: self.header = return_data[0].split(self.delimiter)
         self.data = return_data[1:] if self.has_header else return_data
 
     def parse_delimiter(self, line):
@@ -56,6 +87,43 @@ class CsvParser():
         _, delimiter = max((n, v) for v, n in self.delimiter_count.items())
         return delimiter
 
+    def parse_csv(self):
+        if self.from_misp:
+            self.build_misp_event()
+        else:
+            self.buildAttributes()
+
+    def build_misp_event(self):
+        l_attributes = []
+        l_objects = []
+        objects = defaultdict(list)
+        attribute_fields = self.header[:1] + self.header[2:8]
+        relation_type = self.header[8]
+        object_fields = self.header[9:]
+        for line in self.data:
+            attribute = {}
+            try:
+                a_uuid,_,category,a_type,value,comment,to_ids,date,relation,o_uuid,o_name,o_meta_category = line.split(',')
+            except ValueError:
+                continue
+            for t, v in zip(attribute_fields, [a_uuid,category,a_type,value,comment,to_ids,date]):
+                attribute[t] = v.replace('"', '')
+            attribute['to_ids'] = True if to_ids == '1' else False
+            relation = relation.replace('"', '')
+            if relation:
+                attribute[relation_type] = relation
+                object_index = tuple(o.replace('"', '') for o in (o_uuid,o_name,o_meta_category))
+                objects[object_index].append(attribute)
+            else:
+                l_attributes.append(attribute)
+        for keys, attributes in objects.items():
+            misp_object = {}
+            for t, v in zip(['uuid','name','meta-category'], keys):
+                misp_object[t] = v
+            misp_object['Attribute'] = attributes
+            l_objects.append(misp_object)
+        self.result = {"Attribute": l_attributes, "Object": l_objects}
+
     def buildAttributes(self):
         # if there is only 1 field of data
         if self.delimiter is None:
@@ -63,7 +131,7 @@ class CsvParser():
             for data in self.data:
                 d = data.strip()
                 if d:
-                    self.attributes.append({'types': mispType, 'values': d})
+                    self.result.append({'types': mispType, 'values': d})
         else:
             # split fields that should be recognized as misp attribute types from the others
             list2pop, misp, head = self.findMispTypes()
@@ -83,10 +151,10 @@ class CsvParser():
                     for h, ds in zip(head, datasplit):
                         if h:
                             attribute[h] = ds.strip()
-                    self.attributes.append(attribute)
+                    self.result.append(attribute)
 
     def findMispTypes(self):
-        descFilename = os.path.join(pymisp.__path__[0], 'data/describeTypes.json')
+        descFilename = os.path.join(pymisp_path[0], 'data/describeTypes.json')
         with open(descFilename, 'r') as f:
             MispTypes = json.loads(f.read())['result'].get('types')
         list2pop = []
@@ -124,18 +192,21 @@ def handler(q=False):
     else:
         misperrors['error'] = "Unsupported attributes type"
         return misperrors
-    if not request.get('config') and not request['config'].get('header'):
-        misperrors['error'] = "Configuration error"
-        return misperrors
-    header = request['config'].get('header').split(',')
-    header = [c.strip() for c in header]
     has_header = request['config'].get('has_header')
     has_header = True if has_header == '1' else False
+    if not request.get('config') and not request['config'].get('header'):
+        if has_header:
+            header = []
+        else:
+            misperrors['error'] = "Configuration error"
+            return misperrors
+    else:
+        header = request['config'].get('header').split(',')
+        header = [c.strip() for c in header]
     csv_parser = CsvParser(header, has_header)
-    csv_parser.parse_data(data.split('\n'))
     # build the attributes
     csv_parser.buildAttributes()
-    r = {'results': csv_parser.attributes}
+    r = {'results': csv_parser.result}
     return r
 
 def introspection():

From 92fbcaeff60d82168351e4dbb49133ba26226308 Mon Sep 17 00:00:00 2001
From: chrisr3d <chris.studer.68@gmail.com>
Date: Sat, 28 Jul 2018 00:07:02 +0200
Subject: [PATCH 2/4] fix: Fixed changes omissions in handler function

---
 misp_modules/modules/import_mod/csvimport.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py
index 9b19fc2..d7be52a 100644
--- a/misp_modules/modules/import_mod/csvimport.py
+++ b/misp_modules/modules/import_mod/csvimport.py
@@ -203,9 +203,9 @@ def handler(q=False):
     else:
         header = request['config'].get('header').split(',')
         header = [c.strip() for c in header]
-    csv_parser = CsvParser(header, has_header)
+    csv_parser = CsvParser(header, has_header, data.split('\n'))
     # build the attributes
-    csv_parser.buildAttributes()
+    csv_parser.parse_csv()
     r = {'results': csv_parser.result}
     return r
 

From 7980aa045abaf4053bf2ad754eac7038c46edfd0 Mon Sep 17 00:00:00 2001
From: chrisr3d <chris.studer.68@gmail.com>
Date: Wed, 1 Aug 2018 17:59:00 +0200
Subject: [PATCH 3/4] fix: Handling the case of Context included in the csv
 file exported from MISP

---
 misp_modules/modules/import_mod/csvimport.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py
index d7be52a..90505b2 100644
--- a/misp_modules/modules/import_mod/csvimport.py
+++ b/misp_modules/modules/import_mod/csvimport.py
@@ -22,11 +22,14 @@ duplicatedFields = {'mispType': {'mispComment': 'comment'},
 attributesFields = ['type', 'value', 'category', 'to_ids', 'comment', 'distribution']
 misp_standard_csv_header = ['uuid','event_id','category','type','value','comment','to_ids','date',
                             'object_relation','object_uuid','object_name','object_meta_category']
+misp_context_additional_fields = ['event_info','event_member_org','event_source_org','event_distribution',
+                                  'event_threat_level_id','event_analysis','event_date','event_tag']
 delimiters = [',', ';', '|', '/', '\t', '    ']
 
 class CsvParser():
     def __init__(self, header, has_header, data):
-        if data[0].split(',') == misp_standard_csv_header:
+        data_header = data[0].split(',')
+        if data_header == misp_standard_csv_header or data_header == (misp_standard_csv_header + misp_context_additional_fields):
             self.header = misp_standard_csv_header
             self.from_misp = True
             self.data = data[1:]
@@ -100,10 +103,11 @@ class CsvParser():
         attribute_fields = self.header[:1] + self.header[2:8]
         relation_type = self.header[8]
         object_fields = self.header[9:]
+        header_length = len(self.header)
         for line in self.data:
             attribute = {}
             try:
-                a_uuid,_,category,a_type,value,comment,to_ids,date,relation,o_uuid,o_name,o_meta_category = line.split(',')
+                a_uuid,_,category,a_type,value,comment,to_ids,date,relation,o_uuid,o_name,o_meta_category = line.split(',')[:header_length]
             except ValueError:
                 continue
             for t, v in zip(attribute_fields, [a_uuid,category,a_type,value,comment,to_ids,date]):

From 8b4d24ba635d424c38936dc37223ffe8c1adb779 Mon Sep 17 00:00:00 2001
From: chrisr3d <chris.studer.68@gmail.com>
Date: Thu, 2 Aug 2018 15:42:59 +0200
Subject: [PATCH 4/4] fix: Fixed fields parsing to support files from csv
 export with additional context

---
 misp_modules/modules/import_mod/csvimport.py | 29 +++++++++++---------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/misp_modules/modules/import_mod/csvimport.py b/misp_modules/modules/import_mod/csvimport.py
index 90505b2..5b083a9 100644
--- a/misp_modules/modules/import_mod/csvimport.py
+++ b/misp_modules/modules/import_mod/csvimport.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-import json, os, base64
+import base64, csv, io, json, os
 from pymisp import __path__ as pymisp_path
 from collections import defaultdict
 
@@ -24,13 +24,14 @@ misp_standard_csv_header = ['uuid','event_id','category','type','value','comment
                             'object_relation','object_uuid','object_name','object_meta_category']
 misp_context_additional_fields = ['event_info','event_member_org','event_source_org','event_distribution',
                                   'event_threat_level_id','event_analysis','event_date','event_tag']
+misp_extended_csv_header = misp_standard_csv_header[:9] + ['attribute_tag'] + misp_standard_csv_header[9:] + misp_context_additional_fields
 delimiters = [',', ';', '|', '/', '\t', '    ']
 
 class CsvParser():
     def __init__(self, header, has_header, data):
-        data_header = data[0].split(',')
-        if data_header == misp_standard_csv_header or data_header == (misp_standard_csv_header + misp_context_additional_fields):
-            self.header = misp_standard_csv_header
+        data_header = data[0]
+        if data_header == misp_standard_csv_header or data_header == misp_extended_csv_header:
+            self.header = misp_standard_csv_header if data_header == misp_standard_csv_header else misp_extended_csv_header[:13]
             self.from_misp = True
             self.data = data[1:]
         else:
@@ -100,23 +101,24 @@ class CsvParser():
         l_attributes = []
         l_objects = []
         objects = defaultdict(list)
-        attribute_fields = self.header[:1] + self.header[2:8]
-        relation_type = self.header[8]
-        object_fields = self.header[9:]
         header_length = len(self.header)
+        attribute_fields = self.header[:1] + self.header[2:6]
         for line in self.data:
             attribute = {}
             try:
-                a_uuid,_,category,a_type,value,comment,to_ids,date,relation,o_uuid,o_name,o_meta_category = line.split(',')[:header_length]
+                try:
+                    a_uuid,_,a_category,a_type,value,comment,to_ids,_,relation,o_uuid,o_name,o_category = line[:header_length]
+                except ValueError:
+                    a_uuid,_,a_category,a_type,value,comment,to_ids,_,relation,tag,o_uuid,o_name,o_category = line[:header_length]
+                    if tag: attribute['tags'] = tag
             except ValueError:
                 continue
-            for t, v in zip(attribute_fields, [a_uuid,category,a_type,value,comment,to_ids,date]):
+            for t, v in zip(attribute_fields, [a_uuid, a_category, a_type, value, comment]):
                 attribute[t] = v.replace('"', '')
             attribute['to_ids'] = True if to_ids == '1' else False
-            relation = relation.replace('"', '')
             if relation:
-                attribute[relation_type] = relation
-                object_index = tuple(o.replace('"', '') for o in (o_uuid,o_name,o_meta_category))
+                attribute["object_relation"] = relation.replace('"', '')
+                object_index = tuple(o.replace('"', '') for o in (o_uuid,o_name,o_category))
                 objects[object_index].append(attribute)
             else:
                 l_attributes.append(attribute)
@@ -193,6 +195,7 @@ def handler(q=False):
     request = json.loads(q)
     if request.get('data'):
         data = base64.b64decode(request['data']).decode('utf-8')
+        data = [line for line in csv.reader(io.TextIOWrapper(io.BytesIO(data.encode()), encoding='utf-8'))]
     else:
         misperrors['error'] = "Unsupported attributes type"
         return misperrors
@@ -207,7 +210,7 @@ def handler(q=False):
     else:
         header = request['config'].get('header').split(',')
         header = [c.strip() for c in header]
-    csv_parser = CsvParser(header, has_header, data.split('\n'))
+    csv_parser = CsvParser(header, has_header, data)
     # build the attributes
     csv_parser.parse_csv()
     r = {'results': csv_parser.result}