chg: [cuckooimport] Handle archives downloaded from both the WebUI and the API

pull/326/head
Pierre-Jean Grenier 2019-08-21 16:35:11 +02:00
parent 696bafa749
commit 413cc2469f
1 changed files with 42 additions and 18 deletions

View File

@ -1,9 +1,10 @@
import json import json
import base64 import base64
import tarfile import io
import logging import logging
import posixpath import posixpath
from io import BytesIO, BufferedReader import tarfile
import zipfile
from pymisp import MISPEvent, MISPObject, MISPAttribute from pymisp import MISPEvent, MISPObject, MISPAttribute
from pymisp.tools import make_binary_objects from pymisp.tools import make_binary_objects
from collections import OrderedDict from collections import OrderedDict
@ -12,10 +13,14 @@ log = logging.getLogger(__name__)
misperrors = {'error': 'Error'} misperrors = {'error': 'Error'}
moduleinfo = {'version': '1.0', moduleinfo = {
'author': 'Pierre-Jean Grenier', 'version': '1.1',
'description': 'Cuckoo archive import', 'author': 'Pierre-Jean Grenier',
'module-type': ['import']} 'description': "Import a Cuckoo archive (zipfile or bzip2 tarball), "
"either downloaded manually or exported from the "
"API (/tasks/report/{task_id}/all).",
'module-type': ['import'],
}
moduleconfig = [] moduleconfig = []
@ -202,13 +207,21 @@ class CuckooParser():
self.files = None self.files = None
self.malware_binary = None self.malware_binary = None
self.report = None self.report = None
self.config = {key: int(on) for key, on in config.items()} self.config = {
# if an option is missing (we receive None as a value),
# fall back to the default specified in the options
key: int(
on if on is not None
else self.options[key]["userConfig"]["checked"] == 'true'
)
for key, on in config.items()
}
def get_file(self, relative_filepath): def get_file(self, relative_filepath):
"""Return a BufferedReader for the corresponding relative_filepath """Return an io.BufferedIOBase for the corresponding relative_filepath
in the Cuckoo archive. If not found, return an empty BufferedReader in the Cuckoo archive. If not found, return an empty io.BufferedReader
to avoid fatal errors.""" to avoid fatal errors."""
blackhole = BufferedReader(open('/dev/null', 'rb')) blackhole = io.BufferedReader(open('/dev/null', 'rb'))
res = self.files.get(relative_filepath, blackhole) res = self.files.get(relative_filepath, blackhole)
if res == blackhole: if res == blackhole:
log.debug(f"Did not find file {relative_filepath}, " log.debug(f"Did not find file {relative_filepath}, "
@ -220,12 +233,23 @@ class CuckooParser():
# archive_encoded is base 64 encoded content # archive_encoded is base 64 encoded content
# we extract the info about each file but do not retrieve # we extract the info about each file but do not retrieve
# it automatically, as it may take too much space in memory # it automatically, as it may take too much space in memory
buf_io = BytesIO(base64.b64decode(archive_encoded)) buf_io = io.BytesIO(base64.b64decode(archive_encoded))
f = tarfile.open(fileobj=buf_io, mode='r:bz2') if zipfile.is_zipfile(buf_io):
self.files = { # the archive was probably downloaded from the WebUI
info.name: f.extractfile(info) buf_io.seek(0) # don't forget this not to read an empty buffer
for info in f.getmembers() z = zipfile.ZipFile(buf_io, 'r')
} self.files = {
info.filename: z.open(info)
for info in z.filelist
}
else:
# the archive was probably downloaded from the API
buf_io.seek(0) # don't forget this not to read an empty buffer
f = tarfile.open(fileobj=buf_io, mode='r:bz2')
self.files = {
info.name: f.extractfile(info)
for info in f.getmembers()
}
# We want to keep the order of the keys of sub-dicts in the report, # We want to keep the order of the keys of sub-dicts in the report,
# eg. the signatures have marks with unknown keys such as # eg. the signatures have marks with unknown keys such as
@ -280,7 +304,7 @@ class CuckooParser():
log.debug("Sample is a file, uploading it") log.debug("Sample is a file, uploading it")
self.read_malware() self.read_malware()
file_o, bin_type_o, bin_section_li = make_binary_objects( file_o, bin_type_o, bin_section_li = make_binary_objects(
pseudofile=BytesIO(self.malware_binary), pseudofile=io.BytesIO(self.malware_binary),
filename=target["file"]["name"], filename=target["file"]["name"],
) )
@ -548,7 +572,7 @@ class CuckooParser():
filename = posixpath.basename(path) filename = posixpath.basename(path)
dropped_file = self.get_file(path) dropped_file = self.get_file(path)
dropped_binary = BytesIO(dropped_file.read()) dropped_binary = io.BytesIO(dropped_file.read())
# create ad hoc objects # create ad hoc objects
file_o, bin_type_o, bin_section_li = make_binary_objects( file_o, bin_type_o, bin_section_li = make_binary_objects(
pseudofile=dropped_binary, filename=filename, pseudofile=dropped_binary, filename=filename,