chg: [cuckooimport] Handle archives downloaded from both the WebUI and the API

2019-08-21 16:35:11 +02:00 · 2019-08-21 16:35:11 +02:00 · 413cc2469f
parent 696bafa749
commit 413cc2469f
1 changed files with 42 additions and 18 deletions
--- a/misp_modules/modules/import_mod/cuckooimport.py
+++ b/misp_modules/modules/import_mod/cuckooimport.py
@ -1,9 +1,10 @@
 import json
 import base64
-import tarfile
+import io
 import logging
 import posixpath
-from io import BytesIO, BufferedReader
+import tarfile
+import zipfile
 from pymisp import MISPEvent, MISPObject, MISPAttribute
 from pymisp.tools import make_binary_objects
 from collections import OrderedDict
@ -12,10 +13,14 @@ log = logging.getLogger(__name__)

 misperrors = {'error': 'Error'}

-moduleinfo = {'version': '1.0',
-              'author': 'Pierre-Jean Grenier',
-              'description': 'Cuckoo archive import',
-              'module-type': ['import']}
+moduleinfo = {
+    'version': '1.1',
+    'author': 'Pierre-Jean Grenier',
+    'description': "Import a Cuckoo archive (zipfile or bzip2 tarball), "
+                   "either downloaded manually or exported from the "
+                   "API (/tasks/report/{task_id}/all).",
+    'module-type': ['import'],
+}

 moduleconfig = []

@ -202,13 +207,21 @@ class CuckooParser():
        self.files = None
        self.malware_binary = None
        self.report = None
-        self.config = {key: int(on) for key, on in config.items()}
+        self.config = {
+            # if an option is missing (we receive None as a value),
+            # fall back to the default specified in the options
+            key: int(
+                on if on is not None
+                else self.options[key]["userConfig"]["checked"] == 'true'
+            )
+            for key, on in config.items()
+        }

    def get_file(self, relative_filepath):
-        """Return a BufferedReader for the corresponding relative_filepath
-        in the Cuckoo archive. If not found, return an empty BufferedReader
+        """Return an io.BufferedIOBase for the corresponding relative_filepath
+        in the Cuckoo archive. If not found, return an empty io.BufferedReader
        to avoid fatal errors."""
-        blackhole = BufferedReader(open('/dev/null', 'rb'))
+        blackhole = io.BufferedReader(open('/dev/null', 'rb'))
        res = self.files.get(relative_filepath, blackhole)
        if res == blackhole:
            log.debug(f"Did not find file {relative_filepath}, "
@ -220,12 +233,23 @@ class CuckooParser():
        # archive_encoded is base 64 encoded content
        # we extract the info about each file but do not retrieve
        # it automatically, as it may take too much space in memory
-        buf_io = BytesIO(base64.b64decode(archive_encoded))
-        f = tarfile.open(fileobj=buf_io, mode='r:bz2')
-        self.files = {
-            info.name: f.extractfile(info)
-            for info in f.getmembers()
-        }
+        buf_io = io.BytesIO(base64.b64decode(archive_encoded))
+        if zipfile.is_zipfile(buf_io):
+            # the archive was probably downloaded from the WebUI
+            buf_io.seek(0)  # don't forget this not to read an empty buffer
+            z = zipfile.ZipFile(buf_io, 'r')
+            self.files = {
+                info.filename: z.open(info)
+                for info in z.filelist
+            }
+        else:
+            # the archive was probably downloaded from the API
+            buf_io.seek(0)  # don't forget this not to read an empty buffer
+            f = tarfile.open(fileobj=buf_io, mode='r:bz2')
+            self.files = {
+                info.name: f.extractfile(info)
+                for info in f.getmembers()
+            }

        # We want to keep the order of the keys of sub-dicts in the report,
        # eg. the signatures have marks with unknown keys such as
@ -280,7 +304,7 @@ class CuckooParser():
            log.debug("Sample is a file, uploading it")
            self.read_malware()
            file_o, bin_type_o, bin_section_li = make_binary_objects(
-                pseudofile=BytesIO(self.malware_binary),
+                pseudofile=io.BytesIO(self.malware_binary),
                filename=target["file"]["name"],
            )

@ -548,7 +572,7 @@ class CuckooParser():
            filename = posixpath.basename(path)

        dropped_file = self.get_file(path)
-        dropped_binary = BytesIO(dropped_file.read())
+        dropped_binary = io.BytesIO(dropped_file.read())
        # create ad hoc objects
        file_o, bin_type_o, bin_section_li = make_binary_objects(
            pseudofile=dropped_binary, filename=filename,