PyMISP/pymisp/tools/peobject.py

141 lines
6.3 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from ..exceptions import InvalidMISPObject
from .abstractgenerator import AbstractMISPObjectGenerator
from io import BytesIO
from hashlib import md5, sha1, sha256, sha512
from datetime import datetime
import logging
from typing import Optional, Union
from pathlib import Path
logger = logging.getLogger('pymisp')
try:
import lief # type: ignore
HAS_LIEF = True
except ImportError:
HAS_LIEF = False
try:
import pydeep # type: ignore
HAS_PYDEEP = True
except ImportError:
HAS_PYDEEP = False
class PEObject(AbstractMISPObjectGenerator):
def __init__(self, parsed: Optional[lief.PE.Binary]=None, filepath: Optional[Union[Path, str]]=None, pseudofile: Optional[BytesIO]=None, standalone: bool=True, **kwargs):
# Python3 way
# super().__init__('pe')
super(PEObject, self).__init__('pe', standalone=standalone, **kwargs)
if not HAS_PYDEEP:
logger.warning("Please install pydeep: pip install git+https://github.com/kbandla/pydeep.git")
if not HAS_LIEF:
raise ImportError('Please install lief, documentation here: https://github.com/lief-project/LIEF')
if pseudofile:
if isinstance(pseudofile, BytesIO):
self.__pe = lief.PE.parse(raw=pseudofile.getvalue())
elif isinstance(pseudofile, bytes):
self.__pe = lief.PE.parse(raw=pseudofile)
else:
raise InvalidMISPObject('Pseudo file can be BytesIO or bytes got {}'.format(type(pseudofile)))
elif filepath:
self.__pe = lief.PE.parse(filepath)
elif parsed:
# Got an already parsed blob
if isinstance(parsed, lief.PE.Binary):
self.__pe = parsed
else:
raise InvalidMISPObject('Not a lief.PE.Binary: {}'.format(type(parsed)))
self.generate_attributes()
def _is_exe(self):
if not self._is_dll() and not self._is_driver():
return self.__pe.header.has_characteristic(lief.PE.HEADER_CHARACTERISTICS.EXECUTABLE_IMAGE)
return False
def _is_dll(self):
return self.__pe.header.has_characteristic(lief.PE.HEADER_CHARACTERISTICS.DLL)
def _is_driver(self):
# List from pefile
system_DLLs = set(('ntoskrnl.exe', 'hal.dll', 'ndis.sys', 'bootvid.dll', 'kdcom.dll'))
if system_DLLs.intersection([imp.lower() for imp in self.__pe.libraries]):
return True
return False
def _get_pe_type(self):
if self._is_dll():
return 'dll'
elif self._is_driver():
return 'driver'
elif self._is_exe():
return 'exe'
else:
return 'unknown'
def generate_attributes(self):
self.add_attribute('type', value=self._get_pe_type())
# General information
self.add_attribute('entrypoint-address', value=self.__pe.entrypoint)
self.add_attribute('compilation-timestamp', value=datetime.utcfromtimestamp(self.__pe.header.time_date_stamps).isoformat())
# self.imphash = self.__pe.get_imphash()
try:
if (self.__pe.has_resources and
self.__pe.resources_manager.has_version and
self.__pe.resources_manager.version.has_string_file_info and
self.__pe.resources_manager.version.string_file_info.langcode_items):
fileinfo = dict(self.__pe.resources_manager.version.string_file_info.langcode_items[0].items.items())
self.add_attribute('original-filename', value=fileinfo.get('OriginalFilename'))
self.add_attribute('internal-filename', value=fileinfo.get('InternalName'))
self.add_attribute('file-description', value=fileinfo.get('FileDescription'))
self.add_attribute('file-version', value=fileinfo.get('FileVersion'))
self.add_attribute('lang-id', value=self.__pe.resources_manager.version.string_file_info.langcode_items[0].key)
self.add_attribute('product-name', value=fileinfo.get('ProductName'))
self.add_attribute('product-version', value=fileinfo.get('ProductVersion'))
self.add_attribute('company-name', value=fileinfo.get('CompanyName'))
self.add_attribute('legal-copyright', value=fileinfo.get('LegalCopyright'))
except lief.read_out_of_bound:
# The file is corrupted
pass
# Sections
self.sections = []
if self.__pe.sections:
pos = 0
for section in self.__pe.sections:
s = PESectionObject(section, self._standalone, default_attributes_parameters=self._default_attributes_parameters)
self.add_reference(s.uuid, 'includes', 'Section {} of PE'.format(pos))
if ((self.__pe.entrypoint >= section.virtual_address) and
(self.__pe.entrypoint < (section.virtual_address + section.virtual_size))):
self.add_attribute('entrypoint-section-at-position', value='{}|{}'.format(section.name, pos))
pos += 1
self.sections.append(s)
self.add_attribute('number-sections', value=len(self.sections))
# TODO: TLSSection / DIRECTORY_ENTRY_TLS
class PESectionObject(AbstractMISPObjectGenerator):
def __init__(self, section: lief.PE.Section, standalone: bool=True, **kwargs):
# Python3 way
# super().__init__('pe-section')
super(PESectionObject, self).__init__('pe-section', standalone=standalone, **kwargs)
self.__section = section
self.__data = bytes(self.__section.content)
self.generate_attributes()
def generate_attributes(self):
self.add_attribute('name', value=self.__section.name)
size = self.add_attribute('size-in-bytes', value=self.__section.size)
if int(size.value) > 0:
self.add_attribute('entropy', value=self.__section.entropy)
self.add_attribute('md5', value=md5(self.__data).hexdigest())
self.add_attribute('sha1', value=sha1(self.__data).hexdigest())
self.add_attribute('sha256', value=sha256(self.__data).hexdigest())
self.add_attribute('sha512', value=sha512(self.__data).hexdigest())
if HAS_PYDEEP:
self.add_attribute('ssdeep', value=pydeep.hash_buf(self.__data).decode())