* add parser for report version v1 and v2

* add summary JSON import module
pull/452/head
Jens Thom 2020-11-30 12:06:19 +01:00
parent 095fbfd75f
commit 2a870f2d97
9 changed files with 1539 additions and 504 deletions

View File

@ -26,6 +26,8 @@ click==7.1.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2,
colorama==0.4.3; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
configparser==5.0.1; python_version >= '3.6'
cryptography==3.1.1
clamd==1.0.2
dataclasses; python_version < '3.7'
decorator==4.4.2
deprecated==1.2.10; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
dnspython==2.0.0
@ -46,6 +48,7 @@ jsonschema==3.2.0
lief==0.10.1
lxml==4.5.2
maclookup==1.0.3
markdownify==0.5.3
maxminddb==2.0.2; python_version >= '3.6'
multidict==4.7.6; python_version >= '3.5'
np==1.0.2

File diff suppressed because it is too large Load Diff

View File

@ -1,148 +0,0 @@
#!/usr/bin/env python3
"""Python client library for VMRay REST API"""
import base64
import datetime
import os.path
import requests
import urllib.parse
# disable nasty certification warning
# pylint: disable=no-member
try:
requests.packages.urllib3.disable_warnings()
except AttributeError:
try:
import urllib3
try:
urllib3.disable_warnings()
except AttributeError:
pass
except ImportError:
pass
# pylint: disable=
class VMRayRESTAPIError(Exception):
"""Exception class that is used when API returns an error"""
def __init__(self, *args, **kwargs):
self.status_code = kwargs.pop("status_code", None)
Exception.__init__(self, *args, **kwargs)
def handle_rest_api_result(result):
"""Handle result of API request (check for errors)"""
if (result.status_code < 200) or (result.status_code > 299):
try:
json_result = result.json()
except ValueError:
raise VMRayRESTAPIError("API returned error %u: %s" % (result.status_code, result.text), status_code=result.status_code)
raise VMRayRESTAPIError(json_result.get("error_msg", "Unknown error"), status_code=result.status_code)
class VMRayRESTAPI(object):
"""VMRay REST API class"""
def __init__(self, server, api_key, verify_cert=True):
# split server URL into components
url_desc = urllib.parse.urlsplit(server)
# assume HTTPS if no scheme is specified
if url_desc.scheme == "":
server = "https://" + server
# save variables
self.server = server
self.api_key = api_key
self.verify_cert = verify_cert
def call(self, http_method, api_path, params=None, raw_data=False):
"""Call VMRay REST API"""
# get function of requests package
requests_func = getattr(requests, http_method.lower())
# parse parameters
req_params = {}
file_params = {}
if params is not None:
for key, value in params.items():
if isinstance(value, (datetime.date,
datetime.datetime,
float,
int)):
req_params[key] = str(value)
elif isinstance(value, str):
req_params[key] = str(value)
elif isinstance(value, dict):
filename = value["filename"]
sample = value["data"]
file_params[key] = (filename, sample, "application/octet-stream")
elif hasattr(value, "read"):
filename = os.path.split(value.name)[1]
# For the following block refer to DEV-1820
try:
filename.decode("ASCII")
except (UnicodeDecodeError, UnicodeEncodeError):
b64_key = key + "name_b64enc"
byte_value = filename.encode("utf-8")
b64_value = base64.b64encode(byte_value)
filename = "@param=%s" % b64_key
req_params[b64_key] = b64_value
file_params[key] = (filename, value, "application/octet-stream")
else:
raise VMRayRESTAPIError("Parameter \"%s\" has unknown type \"%s\"" % (key, type(value)))
# construct request
if file_params:
files = file_params
else:
files = None
# we need to adjust some stuff for POST requests
if http_method.lower() == "post":
req_data = req_params
req_params = None
else:
req_data = None
# do request
result = requests_func(self.server + api_path, data=req_data, params=req_params, headers={"Authorization": "api_key " + self.api_key}, files=files, verify=self.verify_cert, stream=raw_data)
handle_rest_api_result(result)
if raw_data:
return result.raw
# parse result
try:
json_result = result.json()
except ValueError:
raise ValueError("API returned invalid JSON: %s" % (result.text))
# if there are no cached elements then return the data
if "continuation_id" not in json_result:
return json_result.get("data", None)
data = json_result["data"]
# get cached results
while "continuation_id" in json_result:
# send request to server
result = requests.get("%s/rest/continuation/%u" % (self.server, json_result["continuation_id"]), headers={"Authorization": "api_key " + self.api_key}, verify=self.verify_cert)
handle_rest_api_result(result)
# parse result
try:
json_result = result.json()
except ValueError:
raise ValueError("API returned invalid JSON: %s" % (result.text))
data.extend(json_result["data"])
return data

View File

@ -19,7 +19,7 @@ from distutils.util import strtobool
import io
import zipfile
from ._vmray.vmray_rest_api import VMRayRESTAPI
from _vmray.vmray_rest_api import VMRayRESTAPI
misperrors = {'error': 'Error'}
mispattributes = {'input': ['attachment', 'malware-sample'], 'output': ['text', 'sha1', 'sha256', 'md5', 'link']}

View File

@ -6,8 +6,6 @@ Import VMRay results.
This version supports import from different analyze jobs, starting from one sample
(the supplied sample_id).
Requires "vmray_rest_api"
The expansion module vmray_submit and import module vmray_import are a two step
process to import data from VMRay.
You can automate this by setting the PyMISP example script 'vmray_automation'
@ -17,378 +15,72 @@ as a cron job
import json
from ._vmray.vmray_rest_api import VMRayRESTAPI
from _vmray.parser import VMRayParser, VMRayParseError
misperrors = {'error': 'Error'}
inputSource = []
moduleinfo = {'version': '0.2', 'author': 'Koen Van Impe',
'description': 'Import VMRay results',
moduleinfo = {'version': '0.4', 'author': 'Jens Thom (VMRay), Koen van Impe',
'description': 'Import VMRay analysis results from a server',
'module-type': ['import']}
userConfig = {'include_analysisid': {'type': 'Boolean',
'message': 'Include link to VMRay analysis'
},
'include_analysisdetails': {'type': 'Boolean',
'message': 'Include (textual) analysis details'
},
'include_vtidetails': {'type': 'Boolean',
'message': 'Include VMRay Threat Identifier (VTI) rules'
},
'include_imphash_ssdeep': {'type': 'Boolean',
'message': 'Include imphash and ssdeep'
},
'include_extracted_files': {'type': 'Boolean',
'message': 'Include extracted files section'
},
'sample_id': {'type': 'Integer',
'errorMessage': 'Expected a sample ID',
'message': 'The VMRay sample_id'
}
}
mispattributes = {
'inputSource': [],
'output': ['MISP objects'],
'format': 'misp_standard',
}
moduleconfig = ['apikey', 'url', 'wait_period']
userConfig = {
"Sample ID": {
"type": "Integer",
"errorMessage": "The VMRay sample ID to download the reports",
},
"VTI": {
"type": "Boolean",
"message": "Include VMRay Threat Identifiers",
"checked": "True"
},
"IOCs": {
"type": "Boolean",
"message": "Include IOCs",
"checked": "True"
},
"Artifacts": {
"type": "Boolean",
"message": "Include other Artifacts",
},
"Analysis Details": {
"type": "Boolean",
"message": "Include Analysis Details",
"checked": "True"
}
}
moduleconfig = ["apikey", "url", "disable_tags", "disable_misp_objects", "ignore_analysis_finished"]
def handler(q=False):
global include_analysisid, include_imphash_ssdeep, include_extracted_files, include_analysisdetails, include_vtidetails, include_static_to_ids
if q is False:
return False
request = json.loads(q)
include_analysisid = bool(int(request["config"].get("include_analysisid")))
include_imphash_ssdeep = bool(int(request["config"].get("include_imphash_ssdeep")))
include_extracted_files = bool(int(request["config"].get("include_extracted_files")))
include_analysisdetails = bool(int(request["config"].get("include_extracted_files")))
include_vtidetails = bool(int(request["config"].get("include_vtidetails")))
include_static_to_ids = True
# print("include_analysisid: %s include_imphash_ssdeep: %s include_extracted_files: %s include_analysisdetails: %s include_vtidetails: %s" % ( include_analysisid, include_imphash_ssdeep, include_extracted_files, include_analysisdetails, include_vtidetails))
sample_id = int(request["config"].get("sample_id"))
if (request["config"].get("apikey") is None) or (request["config"].get("url") is None):
misperrors["error"] = "Missing API key or server URL (hint: try cloud.vmray.com)"
return misperrors
if sample_id > 0:
parser = VMRayParser()
try:
api = VMRayRESTAPI(request["config"].get("url"), request["config"].get("apikey"), False)
vmray_results = {'results': []}
# Get all information on the sample, returns a set of finished analyze jobs
data = vmrayGetInfoAnalysis(api, sample_id)
if data["data"]:
for analysis in data["data"]:
analysis_id = int(analysis["analysis_id"])
if analysis_id > 0:
# Get the details for an analyze job
analysis_data = vmrayDownloadAnalysis(api, analysis_id)
if analysis_data:
if include_analysisdetails and "analysis_details" in analysis_data:
analysis_details = vmrayAnalysisDetails(analysis_data["analysis_details"], analysis_id)
if analysis_details and len(analysis_details["results"]) > 0:
vmray_results = {'results': vmray_results["results"] + analysis_details["results"]}
if "classifications" in analysis_data:
classifications = vmrayClassifications(analysis_data["classifications"], analysis_id)
if classifications and len(classifications["results"]) > 0:
vmray_results = {'results': vmray_results["results"] + classifications["results"]}
if include_extracted_files and "extracted_files" in analysis_data:
extracted_files = vmrayExtractedfiles(analysis_data["extracted_files"])
if extracted_files and len(extracted_files["results"]) > 0:
vmray_results = {'results': vmray_results["results"] + extracted_files["results"]}
if include_vtidetails and "vti" in analysis_data:
vti = vmrayVti(analysis_data["vti"])
if vti and len(vti["results"]) > 0:
vmray_results = {'results': vmray_results["results"] + vti["results"]}
if "artifacts" in analysis_data:
artifacts = vmrayArtifacts(analysis_data["artifacts"])
if artifacts and len(artifacts["results"]) > 0:
vmray_results = {'results': vmray_results["results"] + artifacts["results"]}
if include_analysisid:
a_id = {'results': []}
url1 = request["config"].get("url") + "/user/analysis/view?from_sample_id=%u" % sample_id
url2 = "&id=%u" % analysis_id
url3 = "&sub=%2Freport%2Foverview.html"
a_id["results"].append({"values": url1 + url2 + url3, "types": "link"})
vmray_results = {'results': vmray_results["results"] + a_id["results"]}
# Clean up (remove doubles)
if len(vmray_results["results"]) > 0:
vmray_results = vmrayCleanup(vmray_results)
return vmray_results
else:
misperrors['error'] = "No vti_results returned or jobs not finished"
parser.from_api(request["config"])
parser.parse()
except VMRayParseError as exc:
misperrors["error"] = str(exc)
return misperrors
else:
if "result" in data:
if data["result"] == "ok":
return vmray_results
# Fallback
misperrors['error'] = "Unable to fetch sample id %u" % (sample_id)
return misperrors
except Exception as e: # noqa
misperrors['error'] = "Unable to access VMRay API : %s" % (e)
return misperrors
else:
misperrors['error'] = "Not a valid sample id"
return misperrors
event = parser.to_json()
return event
def introspection():
modulesetup = {}
try:
userConfig
modulesetup['userConfig'] = userConfig
except NameError:
pass
try:
inputSource
modulesetup['inputSource'] = inputSource
except NameError:
pass
return modulesetup
mispattributes["userConfig"] = userConfig
return mispattributes
def version():
moduleinfo['config'] = moduleconfig
return moduleinfo
def vmrayGetInfoAnalysis(api, sample_id):
''' Get information from a sample, returns a set of analyzed reports'''
if sample_id:
data = api.call("GET", "/rest/analysis/sample/%u" % (sample_id), raw_data=True)
return json.loads(data.read().decode())
else:
return False
def vmrayDownloadAnalysis(api, analysis_id):
''' Get the details from an analysis'''
if analysis_id:
try:
data = api.call("GET", "/rest/analysis/%u/archive/logs/summary.json" % (analysis_id), raw_data=True)
return json.loads(data.read().decode())
except Exception as e: # noqa
misperrors['error'] = "Unable to download summary.json for analysis %s" % (analysis_id)
return misperrors
else:
return False
def vmrayVti(vti):
'''VMRay Threat Identifier (VTI) rules that matched for this analysis'''
if vti:
r = {'results': []}
for rule in vti:
if rule == "vti_rule_matches":
vti_rule = vti["vti_rule_matches"]
for el in vti_rule:
if "operation_desc" in el:
comment = ""
types = ["text"]
values = el["operation_desc"]
r['results'].append({'types': types, 'values': values, 'comment': comment})
return r
else:
return False
def vmrayExtractedfiles(extracted_files):
''' Information about files which were extracted during the analysis, such as files that were created, modified, or embedded by the malware'''
if extracted_files:
r = {'results': []}
for file in extracted_files:
if "file_type" and "norm_filename" in file:
comment = "%s - %s" % (file["file_type"], file["norm_filename"])
else:
comment = ""
if "norm_filename" in file:
attr_filename_c = file["norm_filename"].rsplit("\\", 1)
if len(attr_filename_c) > 1:
attr_filename = attr_filename_c[len(attr_filename_c) - 1]
else:
attr_filename = "vmray_sample"
else:
attr_filename = "vmray_sample"
if "md5_hash" in file and file["md5_hash"] is not None:
r['results'].append({'types': ["filename|md5"], 'values': '{}|{}'.format(attr_filename, file["md5_hash"]), 'comment': comment, 'categories': ['Payload delivery', 'Artifacts dropped'], 'to_ids': include_static_to_ids})
if include_imphash_ssdeep and "imp_hash" in file and file["imp_hash"] is not None:
r['results'].append({'types': ["filename|imphash"], 'values': '{}|{}'.format(attr_filename, file["imp_hash"]), 'comment': comment, 'categories': ['Payload delivery', 'Artifacts dropped'], 'to_ids': include_static_to_ids})
if "sha1_hash" in file and file["sha1_hash"] is not None:
r['results'].append({'types': ["filename|sha1"], 'values': '{}|{}'.format(attr_filename, file["sha1_hash"]), 'comment': comment, 'categories': ['Payload delivery', 'Artifacts dropped'], 'to_ids': include_static_to_ids})
if "sha256_hash" in file and file["sha256_hash"] is not None:
r['results'].append({'types': ["filename|sha256"], 'values': '{}|{}'.format(attr_filename, file["sha256_hash"]), 'comment': comment, 'categories': ['Payload delivery', 'Artifacts dropped'], 'to_ids': include_static_to_ids})
if include_imphash_ssdeep and "ssdeep_hash" in file and file["ssdeep_hash"] is not None:
r['results'].append({'types': ["filename|ssdeep"], 'values': '{}|{}'.format(attr_filename, file["ssdeep_hash"]), 'comment': comment, 'categories': ['Payload delivery', 'Artifacts dropped'], 'to_ids': include_static_to_ids})
return r
else:
return False
def vmrayClassifications(classification, analysis_id):
''' List the classifications, tag them on a "text" attribute '''
if classification:
r = {'results': []}
types = ["text"]
comment = ""
values = "Classification : %s " % (", ".join(str(x) for x in classification))
r['results'].append({'types': types, 'values': values, 'comment': comment})
return r
else:
return False
def vmrayAnalysisDetails(details, analysis_id):
''' General information about the analysis information '''
if details:
r = {'results': []}
types = ["text"]
comment = ""
if "execution_successful" in details:
values = "Analysis %s : execution_successful : %s " % (analysis_id, str(details["execution_successful"]))
r['results'].append({'types': types, 'values': values, 'comment': comment})
if "termination_reason" in details:
values = "Analysis %s : termination_reason : %s " % (analysis_id, str(details["termination_reason"]))
r['results'].append({'types': types, 'values': values, 'comment': comment})
if "result_str" in details:
values = "Analysis %s : result : %s " % (analysis_id, details["result_str"])
r['results'].append({'types': types, 'values': values, 'comment': comment})
return r
else:
return False
def vmrayArtifacts(patterns):
''' IOCs that were seen during the analysis '''
if patterns:
r = {'results': []}
y = {'results': []}
for pattern in patterns:
if pattern == "domains":
for el in patterns[pattern]:
values = el["domain"]
types = ["domain", "hostname"]
if "sources" in el:
sources = el["sources"]
comment = "Found in: " + ", ".join(str(x) for x in sources)
else:
comment = ""
r['results'].append({'types': types, 'values': values, 'comment': comment, 'to_ids': include_static_to_ids})
if pattern == "files":
for el in patterns[pattern]:
filename_values = el["filename"]
attr_filename_c = filename_values.rsplit("\\", 1)
if len(attr_filename_c) > 1:
attr_filename = attr_filename_c[len(attr_filename_c) - 1]
else:
attr_filename = ""
filename_types = ["filename"]
filename_operations = el["operations"]
comment = "File operations: " + ", ".join(str(x) for x in filename_operations)
r['results'].append({'types': filename_types, 'values': filename_values, 'comment': comment})
# Run through all hashes
if "hashes" in el:
for hash in el["hashes"]:
if "md5_hash" in hash and hash["md5_hash"] is not None:
r['results'].append({'types': ["filename|md5"], 'values': '{}|{}'.format(attr_filename, hash["md5_hash"]), 'comment': comment, 'categories': ['Payload delivery', 'Artifacts dropped'], 'to_ids': include_static_to_ids})
if include_imphash_ssdeep and "imp_hash" in hash and hash["imp_hash"] is not None:
r['results'].append({'types': ["filename|imphash"], 'values': '{}|{}'.format(attr_filename, hash["imp_hash"]), 'comment': comment, 'categories': ['Payload delivery', 'Artifacts dropped'], 'to_ids': include_static_to_ids})
if "sha1_hash" in hash and hash["sha1_hash"] is not None:
r['results'].append({'types': ["filename|sha1"], 'values': '{}|{}'.format(attr_filename, hash["sha1_hash"]), 'comment': comment, 'categories': ['Payload delivery', 'Artifacts dropped'], 'to_ids': include_static_to_ids})
if "sha256_hash" in hash and hash["sha256_hash"] is not None:
r['results'].append({'types': ["filename|sha256"], 'values': '{}|{}'.format(attr_filename, hash["sha256_hash"]), 'comment': comment, 'categories': ['Payload delivery', 'Artifacts dropped'], 'to_ids': include_static_to_ids})
if include_imphash_ssdeep and "ssdeep_hash" in hash and hash["ssdeep_hash"] is not None:
r['results'].append({'types': ["filename|ssdeep"], 'values': '{}|{}'.format(attr_filename, hash["ssdeep_hash"]), 'comment': comment, 'categories': ['Payload delivery', 'Artifacts dropped'], 'to_ids': include_static_to_ids})
if pattern == "ips":
for el in patterns[pattern]:
values = el["ip_address"]
types = ["ip-dst"]
if "sources" in el:
sources = el["sources"]
comment = "Found in: " + ", ".join(str(x) for x in sources)
else:
comment = ""
r['results'].append({'types': types, 'values': values, 'comment': comment, 'to_ids': include_static_to_ids})
if pattern == "mutexes":
for el in patterns[pattern]:
values = el["mutex_name"]
types = ["mutex"]
if "operations" in el:
sources = el["operations"]
comment = "Operations: " + ", ".join(str(x) for x in sources)
else:
comment = ""
r['results'].append({'types': types, 'values': values, 'comment': comment, 'to_ids': include_static_to_ids})
if pattern == "registry":
for el in patterns[pattern]:
values = el["reg_key_name"]
types = ["regkey"]
include_static_to_ids_tmp = include_static_to_ids
if "operations" in el:
sources = el["operations"]
if sources == ["access"]:
include_static_to_ids_tmp = False
comment = "Operations: " + ", ".join(str(x) for x in sources)
else:
comment = ""
r['results'].append({'types': types, 'values': values, 'comment': comment, 'to_ids': include_static_to_ids_tmp})
if pattern == "urls":
for el in patterns[pattern]:
values = el["url"]
types = ["url"]
if "operations" in el:
sources = el["operations"]
comment = "Operations: " + ", ".join(str(x) for x in sources)
else:
comment = ""
r['results'].append({'types': types, 'values': values, 'comment': comment, 'to_ids': include_static_to_ids})
# Remove doubles
for el in r["results"]:
if el not in y["results"]:
y["results"].append(el)
return y
else:
return False
def vmrayCleanup(x):
''' Remove doubles'''
y = {'results': []}
for el in x["results"]:
if el not in y["results"]:
y["results"].append(el)
return y

View File

@ -0,0 +1,80 @@
import json
from _vmray.parser import VMRayParser, VMRayParseError
misperrors = {'error': 'Error'}
moduleconfig = ["disable_tags"]
moduleinfo = {
"version": "0.1",
"author": "VMRay",
"description": "Import a VMRay Summary JSON report.",
"module-type": ["import"],
}
mispattributes = {
"inputSource": ["file"],
"output": ["MISP objects", "MISP attributes"],
"format": "misp_standard",
}
user_config = {
"Analysis ID": {
"type": "Boolean",
"message": "Include Analysis ID",
"checked": "True"
},
"VTI": {
"type": "Boolean",
"message": "Include VMRay Threat Identifiers",
"checked": "True"
},
"IOCs": {
"type": "Boolean",
"message": "Include IOCs",
"checked": "True"
},
"Artifacts": {
"type": "Boolean",
"message": "Include other Artifacts",
},
"Analysis Details": {
"type": "Boolean",
"message": "Include Analysis Details",
},
"Attach Report": {
"type": "Boolean",
"message": "Include the original imported file as attachment",
}
}
def handler(q=False):
# In case there's no data
if q is False:
return False
q = json.loads(q)
parser = VMRayParser()
try:
parser.from_base64_string(q["config"], q["data"], q["filename"])
parser.parse()
except VMRayParseError as exc:
misperrors["error"] = str(exc)
return misperrors
event = parser.to_json()
return event
def introspection():
mispattributes["userConfig"] = user_config
return mispattributes
def version():
moduleinfo["config"] = moduleconfig
return moduleinfo