misp-modules/misp_modules/modules/expansion/virustotal.py

315 lines
14 KiB
Python

from urllib.parse import urlparse
import vt
from . import check_input_attribute, standard_error_message
from pymisp import MISPAttribute, MISPEvent, MISPObject
misperrors = {'error': 'Error'}
mispattributes = {'input': ['hostname', 'domain', "ip-src", "ip-dst", "md5", "sha1", "sha256", "url", "ip-src|port", "ip-dst|port"],
'format': 'misp_standard'}
# possible module-types: 'expansion', 'hover' or both
moduleinfo = {
'version': '6',
'author': 'Hannah Ward',
'description': 'Enrich observables with the VirusTotal v3 API',
'module-type': ['expansion'],
'name': 'VirusTotal v3 Lookup',
'logo': 'virustotal.png',
'requirements': ['An access to the VirusTotal API (apikey), with a high request rate limit.'],
'features': 'New format of modules able to return attributes and objects.\n\nA module to take a MISP attribute as input and query the VirusTotal API to get additional data about it.\n\nCompared to the [standard VirusTotal expansion module](https://github.com/MISP/misp-modules/blob/main/misp_modules/modules/expansion/virustotal_public.py), this module is made for advanced parsing of VirusTotal report, with a recursive analysis of the elements found after the first request.\n\nThus, it requires a higher request rate limit to avoid the API to return a 204 error (Request rate limit exceeded), and the data parsed from the different requests are returned as MISP attributes and objects, with the corresponding relations between each one of them.',
'references': ['https://www.virustotal.com/', 'https://docs.virustotal.com/reference/overview'],
'input': 'A domain, hash (md5, sha1, sha256 or sha512), hostname or IP address attribute.',
'output': 'MISP attributes and objects resulting from the parsing of the VirusTotal report concerning the input attribute.',
}
# config fields that your code expects from the site admin
moduleconfig = ["apikey", "event_limit", 'proxy_host', 'proxy_port', 'proxy_username', 'proxy_password']
DEFAULT_RESULTS_LIMIT = 10
class VirusTotalParser:
def __init__(self, client: vt.Client, limit: int) -> None:
self.client = client
self.limit = limit or DEFAULT_RESULTS_LIMIT
self.misp_event = MISPEvent()
self.attribute = MISPAttribute()
self.parsed_objects = {}
self.input_types_mapping = {'ip-src': self.parse_ip, 'ip-dst': self.parse_ip,
'domain': self.parse_domain, 'hostname': self.parse_domain,
'md5': self.parse_hash, 'sha1': self.parse_hash,
'sha256': self.parse_hash, 'url': self.parse_url,
'ip-src|port': self.parse_ip_port, 'ip-dst|port': self.parse_ip_port}
self.proxies = None
@staticmethod
def get_total_analysis(analysis: dict, known_distributors: dict = None) -> int:
if not analysis:
return 0
count = sum([analysis['undetected'], analysis['suspicious'], analysis['harmless']])
return count if known_distributors else count + analysis['malicious']
def query_api(self, attribute: dict) -> None:
self.attribute.from_dict(**attribute)
self.input_types_mapping[self.attribute.type](self.attribute.value)
def get_result(self) -> dict:
event = self.misp_event.to_dict()
results = {key: event[key] for key in ('Attribute', 'Object') if (key in event and event[key])}
return {'results': results}
def add_vt_report(self, report: vt.Object) -> str:
analysis = report.get('last_analysis_stats')
total = self.get_total_analysis(analysis, report.get('known_distributors'))
if report.type == 'ip_address':
rtype = 'ip-address'
else:
rtype = report.type
permalink = f'https://www.virustotal.com/gui/{rtype}/{report.id}'
vt_object = MISPObject('virustotal-report')
vt_object.add_attribute('permalink', type='link', value=permalink)
detection_ratio = f"{analysis['malicious']}/{total}" if analysis else '-/-'
vt_object.add_attribute('detection-ratio', type='text', value=detection_ratio, disable_correlation=True)
self.misp_event.add_object(**vt_object)
return vt_object.uuid
def create_misp_object(self, report: vt.Object) -> MISPObject:
misp_object = None
vt_uuid = self.add_vt_report(report)
if report.type == 'file':
misp_object = MISPObject('file')
for hash_type in ('md5', 'sha1', 'sha256', 'tlsh',
'vhash', 'ssdeep', 'imphash'):
misp_object.add_attribute(hash_type,
**{'type': hash_type,
'value': report.get(hash_type)})
elif report.type == 'domain':
misp_object = MISPObject('domain-ip')
misp_object.add_attribute('domain', type='domain', value=report.id)
elif report.type == 'ip_address':
misp_object = MISPObject('domain-ip')
misp_object.add_attribute('ip', type='ip-dst', value=report.id)
elif report.type == 'url':
misp_object = MISPObject('url')
misp_object.add_attribute('url', type='url', value=report.id)
misp_object.add_reference(vt_uuid, 'analyzed-with')
return misp_object
################################################################################
#### Main parsing functions #### # noqa
################################################################################
def parse_domain(self, domain: str) -> str:
domain_report = self.client.get_object(f'/domains/{domain}')
# DOMAIN
domain_object = self.create_misp_object(domain_report)
# WHOIS
if domain_report.whois:
whois_object = MISPObject('whois')
whois_object.add_attribute('text', type='text', value=domain_report.whois)
self.misp_event.add_object(**whois_object)
# SIBLINGS AND SUBDOMAINS
for relationship_name, misp_name in [('siblings', 'sibling-of'), ('subdomains', 'subdomain')]:
rel_iterator = self.client.iterator(f'/domains/{domain_report.id}/{relationship_name}', limit=self.limit)
for item in rel_iterator:
attr = MISPAttribute()
attr.from_dict(**dict(type='domain', value=item.id))
self.misp_event.add_attribute(**attr)
domain_object.add_reference(attr.uuid, misp_name)
# RESOLUTIONS
resolutions_iterator = self.client.iterator(f'/domains/{domain_report.id}/resolutions', limit=self.limit)
for resolution in resolutions_iterator:
domain_object.add_attribute('ip', type='ip-dst', value=resolution.ip_address)
# COMMUNICATING, DOWNLOADED AND REFERRER FILES
for relationship_name, misp_name in [
('communicating_files', 'communicates-with'),
('downloaded_files', 'downloaded-from'),
('referrer_files', 'referring')
]:
files_iterator = self.client.iterator(f'/domains/{domain_report.id}/{relationship_name}', limit=self.limit)
for file in files_iterator:
file_object = self.create_misp_object(file)
file_object.add_reference(domain_object.uuid, misp_name)
self.misp_event.add_object(**file_object)
# URLS
urls_iterator = self.client.iterator(f'/domains/{domain_report.id}/urls', limit=self.limit)
for url in urls_iterator:
url_object = self.create_misp_object(url)
url_object.add_reference(domain_object.uuid, 'hosted-in')
self.misp_event.add_object(**url_object)
self.misp_event.add_object(**domain_object)
return domain_object.uuid
def parse_hash(self, file_hash: str) -> str:
file_report = self.client.get_object(f'/files/{file_hash}')
file_object = self.create_misp_object(file_report)
# ITW URLS
urls_iterator = self.client.iterator(f'/files/{file_report.id}/itw_urls', limit=self.limit)
for url in urls_iterator:
url_object = self.create_misp_object(url)
url_object.add_reference(file_object.uuid, 'downloaded')
self.misp_event.add_object(**url_object)
# COMMUNICATING, DOWNLOADED AND REFERRER FILES
for relationship_name, misp_name in [
('contacted_urls', 'communicates-with'),
('contacted_domains', 'communicates-with'),
('contacted_ips', 'communicates-with')
]:
related_files_iterator = self.client.iterator(f'/files/{file_report.id}/{relationship_name}', limit=self.limit)
for related_file in related_files_iterator:
related_file_object = self.create_misp_object(related_file)
related_file_object.add_reference(file_object.uuid, misp_name)
self.misp_event.add_object(**related_file_object)
self.misp_event.add_object(**file_object)
return file_object.uuid
def parse_ip_port(self, ipport: str) -> str:
ip = ipport.split('|')[0]
self.parse_ip(ip)
def parse_ip(self, ip: str) -> str:
ip_report = self.client.get_object(f'/ip_addresses/{ip}')
# IP
ip_object = self.create_misp_object(ip_report)
# ASN
asn_object = MISPObject('asn')
asn_object.add_attribute('asn', type='AS', value=ip_report.asn)
asn_object.add_attribute('subnet-announced', type='ip-src', value=ip_report.network)
asn_object.add_attribute('country', type='text', value=ip_report.country)
self.misp_event.add_object(**asn_object)
# RESOLUTIONS
resolutions_iterator = self.client.iterator(f'/ip_addresses/{ip_report.id}/resolutions', limit=self.limit)
for resolution in resolutions_iterator:
ip_object.add_attribute('domain', type='domain', value=resolution.host_name)
# URLS
urls_iterator = self.client.iterator(f'/ip_addresses/{ip_report.id}/urls', limit=self.limit)
for url in urls_iterator:
url_object = self.create_misp_object(url)
url_object.add_reference(ip_object.uuid, 'hosted-in')
self.misp_event.add_object(**url_object)
self.misp_event.add_object(**ip_object)
return ip_object.uuid
def parse_url(self, url: str) -> str:
url_id = vt.url_id(url)
url_report = self.client.get_object(f'/urls/{url_id}')
url_object = self.create_misp_object(url_report)
# COMMUNICATING, DOWNLOADED AND REFERRER FILES
for relationship_name, misp_name in [
('communicating_files', 'communicates-with'),
('downloaded_files', 'downloaded-from'),
('referrer_files', 'referring')
]:
files_iterator = self.client.iterator(f'/urls/{url_report.id}/{relationship_name}', limit=self.limit)
for file in files_iterator:
file_object = self.create_misp_object(file)
file_object.add_reference(url_object.uuid, misp_name)
self.misp_event.add_object(**file_object)
self.misp_event.add_object(**url_object)
return url_object.uuid
def get_proxy_settings(config: dict) -> dict:
"""Returns proxy settings in the requests format.
If no proxy settings are set, return None."""
proxies = None
host = config.get('proxy_host')
port = config.get('proxy_port')
username = config.get('proxy_username')
password = config.get('proxy_password')
if host:
if not port:
misperrors['error'] = 'The virustotal_proxy_host config is set, ' \
'please also set the virustotal_proxy_port.'
raise KeyError
parsed = urlparse(host)
if 'http' in parsed.scheme:
scheme = 'http'
else:
scheme = parsed.scheme
netloc = parsed.netloc
host = f'{netloc}:{port}'
if username:
if not password:
misperrors['error'] = 'The virustotal_proxy_username config is set, ' \
'please also set the virustotal_proxy_password.'
raise KeyError
auth = f'{username}:{password}'
host = auth + '@' + host
proxies = {
'http': f'{scheme}://{host}',
'https': f'{scheme}://{host}'
}
return proxies
def parse_error(status_code: int) -> str:
status_mapping = {204: 'VirusTotal request rate limit exceeded.',
400: 'Incorrect request, please check the arguments.',
403: 'You don\'t have enough privileges to make the request.'}
if status_code in status_mapping:
return status_mapping[status_code]
return "VirusTotal may not be accessible."
def dict_handler(request: dict):
if not request.get('config') or not request['config'].get('apikey'):
misperrors['error'] = 'A VirusTotal api key is required for this module.'
return misperrors
if not request.get('attribute') or not check_input_attribute(request['attribute']):
return {'error': f'{standard_error_message}, which should contain at least a type, a value and an uuid.'}
if request['attribute']['type'] not in mispattributes['input']:
return {'error': 'Unsupported attribute type.'}
event_limit = request['config'].get('event_limit')
attribute = request['attribute']
proxy_settings = get_proxy_settings(request.get('config'))
try:
client = vt.Client(request['config']['apikey'],
headers={
'x-tool': 'MISPModuleVirusTotalExpansion',
},
proxy=proxy_settings['http'] if proxy_settings else None)
parser = VirusTotalParser(client, int(event_limit) if event_limit else None)
parser.query_api(attribute)
except vt.APIError as ex:
if ex.code == 'ForbiddenError':
misperrors['error'] = 'ForbiddenError'
else:
misperrors['error'] = ex.message
return misperrors
return parser.get_result()
def introspection():
return mispattributes
def version():
moduleinfo['config'] = moduleconfig
return moduleinfo